CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 1,083 results for author: <span class="mathjax">Le, T</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/" aria-role="search"> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Le, T"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Le%2C+T&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Le, T"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Le%2C+T&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Le%2C+T&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Le%2C+T&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Le%2C+T&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Le%2C+T&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Le%2C+T&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.17046">arXiv:2411.17046</a> <span> [<a href="https://arxiv.org/pdf/2411.17046">pdf</a>, <a href="https://arxiv.org/format/2411.17046">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Large-Scale Data-Free Knowledge Distillation for ImageNet via Multi-Resolution Data Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Tran%2C+M">Minh-Tuan Tran</a>, <a href="/search/?searchtype=author&query=Le%2C+T">Trung Le</a>, <a href="/search/?searchtype=author&query=Le%2C+X">Xuan-May Le</a>, <a href="/search/?searchtype=author&query=Cai%2C+J">Jianfei Cai</a>, <a href="/search/?searchtype=author&query=Harandi%2C+M">Mehrtash Harandi</a>, <a href="/search/?searchtype=author&query=Phung%2C+D">Dinh Phung</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.17046v1-abstract-short" style="display: inline;"> Data-Free Knowledge Distillation (DFKD) is an advanced technique that enables knowledge transfer from a teacher model to a student model without relying on original training data. While DFKD methods have achieved success on smaller datasets like CIFAR10 and CIFAR100, they encounter challenges on larger, high-resolution datasets such as ImageNet. A primary issue with previous approaches is their ge… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17046v1-abstract-full').style.display = 'inline'; document.getElementById('2411.17046v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.17046v1-abstract-full" style="display: none;"> Data-Free Knowledge Distillation (DFKD) is an advanced technique that enables knowledge transfer from a teacher model to a student model without relying on original training data. While DFKD methods have achieved success on smaller datasets like CIFAR10 and CIFAR100, they encounter challenges on larger, high-resolution datasets such as ImageNet. A primary issue with previous approaches is their generation of synthetic images at high resolutions (e.g., $224 \times 224$) without leveraging information from real images, often resulting in noisy images that lack essential class-specific features in large datasets. Additionally, the computational cost of generating the extensive data needed for effective knowledge transfer can be prohibitive. In this paper, we introduce MUlti-reSolution data-freE (MUSE) to address these limitations. MUSE generates images at lower resolutions while using Class Activation Maps (CAMs) to ensure that the generated images retain critical, class-specific features. To further enhance model diversity, we propose multi-resolution generation and embedding diversity techniques that strengthen latent space representations, leading to significant performance improvements. Experimental results demonstrate that MUSE achieves state-of-the-art performance across both small- and large-scale datasets, with notable performance gains of up to two digits in nearly all ImageNet and subset experiments. Code is available at https://github.com/tmtuan1307/muse. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17046v1-abstract-full').style.display = 'none'; document.getElementById('2411.17046v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.16935">arXiv:2411.16935</a> <span> [<a href="https://arxiv.org/pdf/2411.16935">pdf</a>, <a href="https://arxiv.org/format/2411.16935">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Classical Analysis and ODEs">math.CA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Metric Geometry">math.MG</span> </div> </div> <p class="title is-5 mathjax"> Buffon Needle Problem Over Convex Sets </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Dannenberg%2C+M">M. Dannenberg</a>, <a href="/search/?searchtype=author&query=Hagerstrom%2C+W">W. Hagerstrom</a>, <a href="/search/?searchtype=author&query=Hart%2C+G">G. Hart</a>, <a href="/search/?searchtype=author&query=Iosevich%2C+A">A. Iosevich</a>, <a href="/search/?searchtype=author&query=Le%2C+T">T. Le</a>, <a href="/search/?searchtype=author&query=Li%2C+I">I. Li</a>, <a href="/search/?searchtype=author&query=Skerrett%2C+N">N. Skerrett</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.16935v1-abstract-short" style="display: inline;"> We solve a variant of the classical Buffon Needle problem. More specifically, we inspect the probability that a randomly oriented needle of length $l$ originating in a bounded convex set $X\subset\mathbb{R}^2$ lies entirely within $X$. Using techniques from convex geometry, we prove an isoperimetric type inequality, showing that among sets $X$ with equal perimeter, the disk maximizes this probabil… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.16935v1-abstract-full').style.display = 'inline'; document.getElementById('2411.16935v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.16935v1-abstract-full" style="display: none;"> We solve a variant of the classical Buffon Needle problem. More specifically, we inspect the probability that a randomly oriented needle of length $l$ originating in a bounded convex set $X\subset\mathbb{R}^2$ lies entirely within $X$. Using techniques from convex geometry, we prove an isoperimetric type inequality, showing that among sets $X$ with equal perimeter, the disk maximizes this probability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.16935v1-abstract-full').style.display = 'none'; document.getElementById('2411.16935v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 42B10 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10657">arXiv:2411.10657</a> <span> [<a href="https://arxiv.org/pdf/2411.10657">pdf</a>, <a href="https://arxiv.org/format/2411.10657">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Brain-to-Text Decoding with Context-Aware Neural Representations and Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Li%2C+J">Jingyuan Li</a>, <a href="/search/?searchtype=author&query=Le%2C+T">Trung Le</a>, <a href="/search/?searchtype=author&query=Fan%2C+C">Chaofei Fan</a>, <a href="/search/?searchtype=author&query=Chen%2C+M">Mingfei Chen</a>, <a href="/search/?searchtype=author&query=Shlizerman%2C+E">Eli Shlizerman</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10657v1-abstract-short" style="display: inline;"> Decoding attempted speech from neural activity offers a promising avenue for restoring communication abilities in individuals with speech impairments. Previous studies have focused on mapping neural activity to text using phonemes as the intermediate target. While successful, decoding neural activity directly to phonemes ignores the context dependent nature of the neural activity-to-phoneme mappin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10657v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10657v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10657v1-abstract-full" style="display: none;"> Decoding attempted speech from neural activity offers a promising avenue for restoring communication abilities in individuals with speech impairments. Previous studies have focused on mapping neural activity to text using phonemes as the intermediate target. While successful, decoding neural activity directly to phonemes ignores the context dependent nature of the neural activity-to-phoneme mapping in the brain, leading to suboptimal decoding performance. In this work, we propose the use of diphone - an acoustic representation that captures the transitions between two phonemes - as the context-aware modeling target. We integrate diphones into existing phoneme decoding frameworks through a novel divide-and-conquer strategy in which we model the phoneme distribution by marginalizing over the diphone distribution. Our approach effectively leverages the enhanced context-aware representation of diphones while preserving the manageable class size of phonemes, a key factor in simplifying the subsequent phoneme-to-text conversion task. We demonstrate the effectiveness of our approach on the Brain-to-Text 2024 benchmark, where it achieves state-of-the-art Phoneme Error Rate (PER) of 15.34% compared to 16.62% PER of monophone-based decoding. When coupled with finetuned Large Language Models (LLMs), our method yields a Word Error Rate (WER) of 5.77%, significantly outperforming the 8.93% WER of the leading method in the benchmark. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10657v1-abstract-full').style.display = 'none'; document.getElementById('2411.10657v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10298">arXiv:2411.10298</a> <span> [<a href="https://arxiv.org/pdf/2411.10298">pdf</a>, <a href="https://arxiv.org/format/2411.10298">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Unveiling Topological Structures in Text: A Comprehensive Survey of Topological Data Analysis Applications in NLP </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Uchendu%2C+A">Adaku Uchendu</a>, <a href="/search/?searchtype=author&query=Le%2C+T">Thai Le</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10298v1-abstract-short" style="display: inline;"> The surge of data available on the internet has led to the adoption of various computational methods to analyze and extract valuable insights from this wealth of information. Among these, the field of Machine Learning (ML) has thrived by leveraging data to extract meaningful insights. However, ML techniques face notable challenges when dealing with real-world data, often due to issues of imbalance… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10298v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10298v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10298v1-abstract-full" style="display: none;"> The surge of data available on the internet has led to the adoption of various computational methods to analyze and extract valuable insights from this wealth of information. Among these, the field of Machine Learning (ML) has thrived by leveraging data to extract meaningful insights. However, ML techniques face notable challenges when dealing with real-world data, often due to issues of imbalance, noise, insufficient labeling, and high dimensionality. To address these limitations, some researchers advocate for the adoption of Topological Data Analysis (TDA), a statistical approach that discerningly captures the intrinsic shape of data despite noise. Despite its potential, TDA has not gained as much traction within the Natural Language Processing (NLP) domain compared to structurally distinct areas like computer vision. Nevertheless, a dedicated community of researchers has been exploring the application of TDA in NLP, yielding 85 papers we comprehensively survey in this paper. Our findings categorize these efforts into theoretical and nontheoretical approaches. Theoretical approaches aim to explain linguistic phenomena from a topological viewpoint, while non-theoretical approaches merge TDA with ML features, utilizing diverse numerical representation techniques. We conclude by exploring the challenges and unresolved questions that persist in this niche field. Resources and a list of papers on this topic can be found at: https://github.com/AdaUchendu/AwesomeTDA4NLP. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10298v1-abstract-full').style.display = 'none'; document.getElementById('2411.10298v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.09510">arXiv:2411.09510</a> <span> [<a href="https://arxiv.org/pdf/2411.09510">pdf</a>, <a href="https://arxiv.org/format/2411.09510">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Communication Compression for Tensor Parallel LLM Inference </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Hansen-Palmus%2C+J">Jan Hansen-Palmus</a>, <a href="/search/?searchtype=author&query=Le%2C+M+T">Michael Truong Le</a>, <a href="/search/?searchtype=author&query=Hausd%C3%B6rfer%2C+O">Oliver Hausd枚rfer</a>, <a href="/search/?searchtype=author&query=Verma%2C+A">Alok Verma</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.09510v2-abstract-short" style="display: inline;"> Large Language Models (LLMs) have pushed the frontier of artificial intelligence but are comprised of hundreds of billions of parameters and operations. For faster inference latency, LLMs are deployed on multiple hardware accelerators through various Model Parallelism strategies. Our paper looks into the details on one such strategy - Tensor Parallel - and proposes to reduce latency by compressing… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09510v2-abstract-full').style.display = 'inline'; document.getElementById('2411.09510v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.09510v2-abstract-full" style="display: none;"> Large Language Models (LLMs) have pushed the frontier of artificial intelligence but are comprised of hundreds of billions of parameters and operations. For faster inference latency, LLMs are deployed on multiple hardware accelerators through various Model Parallelism strategies. Our paper looks into the details on one such strategy - Tensor Parallel - and proposes to reduce latency by compressing inter-accelerator communication. We leverage fine grained quantization techniques to compress selected activations by 3.5 - 4.5x. Our proposed method leads up to 2x reduction of time-to-first-token (TTFT) with negligible model performance degradation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.09510v2-abstract-full').style.display = 'none'; document.getElementById('2411.09510v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.06738">arXiv:2411.06738</a> <span> [<a href="https://arxiv.org/pdf/2411.06738">pdf</a>, <a href="https://arxiv.org/format/2411.06738">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> 360-Degree Video Super Resolution and Quality Enhancement Challenge: Methods and Results </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Telili%2C+A">Ahmed Telili</a>, <a href="/search/?searchtype=author&query=Hamidouche%2C+W">Wassim Hamidouche</a>, <a href="/search/?searchtype=author&query=Farhat%2C+I">Ibrahim Farhat</a>, <a href="/search/?searchtype=author&query=Amirpour%2C+H">Hadi Amirpour</a>, <a href="/search/?searchtype=author&query=Timmerer%2C+C">Christian Timmerer</a>, <a href="/search/?searchtype=author&query=Khadraoui%2C+I">Ibrahim Khadraoui</a>, <a href="/search/?searchtype=author&query=Lu%2C+J">Jiajie Lu</a>, <a href="/search/?searchtype=author&query=Van+Le%2C+T">The Van Le</a>, <a href="/search/?searchtype=author&query=Baek%2C+J">Jeonneung Baek</a>, <a href="/search/?searchtype=author&query=Lee%2C+J+Y">Jin Young Lee</a>, <a href="/search/?searchtype=author&query=Wei%2C+Y">Yiying Wei</a>, <a href="/search/?searchtype=author&query=Sun%2C+X">Xiaopeng Sun</a>, <a href="/search/?searchtype=author&query=Gao%2C+Y">Yu Gao</a>, <a href="/search/?searchtype=author&query=Huangl%2C+J">JianCheng Huangl</a>, <a href="/search/?searchtype=author&query=Zhong%2C+Y">Yujie Zhong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.06738v1-abstract-short" style="display: inline;"> Omnidirectional (360-degree) video is rapidly gaining popularity due to advancements in immersive technologies like virtual reality (VR) and extended reality (XR). However, real-time streaming of such videos, especially in live mobile scenarios like unmanned aerial vehicles (UAVs), is challenged by limited bandwidth and strict latency constraints. Traditional methods, such as compression and adapt… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06738v1-abstract-full').style.display = 'inline'; document.getElementById('2411.06738v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.06738v1-abstract-full" style="display: none;"> Omnidirectional (360-degree) video is rapidly gaining popularity due to advancements in immersive technologies like virtual reality (VR) and extended reality (XR). However, real-time streaming of such videos, especially in live mobile scenarios like unmanned aerial vehicles (UAVs), is challenged by limited bandwidth and strict latency constraints. Traditional methods, such as compression and adaptive resolution, help but often compromise video quality and introduce artifacts that degrade the viewer experience. Additionally, the unique spherical geometry of 360-degree video presents challenges not encountered in traditional 2D video. To address these issues, we initiated the 360-degree Video Super Resolution and Quality Enhancement Challenge. This competition encourages participants to develop efficient machine learning solutions to enhance the quality of low-bitrate compressed 360-degree videos, with two tracks focusing on 2x and 4x super-resolution (SR). In this paper, we outline the challenge framework, detailing the two competition tracks and highlighting the SR solutions proposed by the top-performing models. We assess these models within a unified framework, considering quality enhancement, bitrate gain, and computational efficiency. This challenge aims to drive innovation in real-time 360-degree video streaming, improving the quality and accessibility of immersive visual experiences. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.06738v1-abstract-full').style.display = 'none'; document.getElementById('2411.06738v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 9 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05641">arXiv:2411.05641</a> <span> [<a href="https://arxiv.org/pdf/2411.05641">pdf</a>, <a href="https://arxiv.org/format/2411.05641">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Evaluating Large Language Model Capability in Vietnamese Fact-Checking Data Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=To%2C+L+T">Long Truong To</a>, <a href="/search/?searchtype=author&query=Le%2C+H+T">Hung Tuan Le</a>, <a href="/search/?searchtype=author&query=Nguyen%2C+D+V">Dat Van-Thanh Nguyen</a>, <a href="/search/?searchtype=author&query=Nguyen%2C+M+T">Manh Trong Nguyen</a>, <a href="/search/?searchtype=author&query=Nguyen%2C+T+T">Tri Thien Nguyen</a>, <a href="/search/?searchtype=author&query=Van+Huynh%2C+T">Tin Van Huynh</a>, <a href="/search/?searchtype=author&query=Van+Nguyen%2C+K">Kiet Van Nguyen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05641v1-abstract-short" style="display: inline;"> Large Language Models (LLMs), with gradually improving reading comprehension and reasoning capabilities, are being applied to a range of complex language tasks, including the automatic generation of language data for various purposes. However, research on applying LLMs for automatic data generation in low-resource languages like Vietnamese is still underdeveloped and lacks comprehensive evaluation… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05641v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05641v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05641v1-abstract-full" style="display: none;"> Large Language Models (LLMs), with gradually improving reading comprehension and reasoning capabilities, are being applied to a range of complex language tasks, including the automatic generation of language data for various purposes. However, research on applying LLMs for automatic data generation in low-resource languages like Vietnamese is still underdeveloped and lacks comprehensive evaluation. In this paper, we explore the use of LLMs for automatic data generation for the Vietnamese fact-checking task, which faces significant data limitations. Specifically, we focus on fact-checking data where claims are synthesized from multiple evidence sentences to assess the information synthesis capabilities of LLMs. We develop an automatic data construction process using simple prompt techniques on LLMs and explore several methods to improve the quality of the generated data. To evaluate the quality of the data generated by LLMs, we conduct both manual quality assessments and performance evaluations using language models. Experimental results and manual evaluations illustrate that while the quality of the generated data has significantly improved through fine-tuning techniques, LLMs still cannot match the data quality produced by humans. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05641v1-abstract-full').style.display = 'none'; document.getElementById('2411.05641v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.05524">arXiv:2411.05524</a> <span> [<a href="https://arxiv.org/pdf/2411.05524">pdf</a>, <a href="https://arxiv.org/format/2411.05524">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> Alignment of 3D woodblock geometrical models and 2D orthographic projection image </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Nguyen%2C+M+D">Minh DUc Nguyen</a>, <a href="/search/?searchtype=author&query=Le%2C+C+T">Cong Thuong Le</a>, <a href="/search/?searchtype=author&query=Nguyen%2C+T+L">Trong Lam Nguyen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.05524v1-abstract-short" style="display: inline;"> The accurate alignment of 3D woodblock geometrical models with 2D orthographic projection images presents a significant challenge in the digital preservation of Vietnamese cultural heritage. This paper proposes a unified image processing algorithm to address this issue, enhancing the registration quality between 3D woodblock models and their 2D representations. The method includes determining the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05524v1-abstract-full').style.display = 'inline'; document.getElementById('2411.05524v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.05524v1-abstract-full" style="display: none;"> The accurate alignment of 3D woodblock geometrical models with 2D orthographic projection images presents a significant challenge in the digital preservation of Vietnamese cultural heritage. This paper proposes a unified image processing algorithm to address this issue, enhancing the registration quality between 3D woodblock models and their 2D representations. The method includes determining the plane of the 3D character model, establishing a transformation matrix to align this plane with the 2D printed image plane, and creating a parallel-projected depth map for precise alignment. This process minimizes disocclusions and ensures that character shapes and strokes are correctly positioned. Experimental results highlight the importance of structure-based comparisons to optimize alignment for large-scale Han-Nom character datasets. The proposed approach, combining density-based and structure-based methods, demonstrates improved registration performance, offering an effective normalization scheme for digital heritage preservation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.05524v1-abstract-full').style.display = 'none'; document.getElementById('2411.05524v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04756">arXiv:2411.04756</a> <span> [<a href="https://arxiv.org/pdf/2411.04756">pdf</a>, <a href="https://arxiv.org/format/2411.04756">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> A study of Vietnamese readability assessing through semantic and statistical features </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Le%2C+H+T">Hung Tuan Le</a>, <a href="/search/?searchtype=author&query=To%2C+L+T">Long Truong To</a>, <a href="/search/?searchtype=author&query=Nguyen%2C+M+T">Manh Trong Nguyen</a>, <a href="/search/?searchtype=author&query=Nguyen%2C+Q">Quyen Nguyen</a>, <a href="/search/?searchtype=author&query=Do%2C+T">Trong-Hop Do</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04756v1-abstract-short" style="display: inline;"> Determining the difficulty of a text involves assessing various textual features that may impact the reader's text comprehension, yet current research in Vietnamese has only focused on statistical features. This paper introduces a new approach that integrates statistical and semantic approaches to assessing text readability. Our research utilized three distinct datasets: the Vietnamese Text Readab… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04756v1-abstract-full').style.display = 'inline'; document.getElementById('2411.04756v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04756v1-abstract-full" style="display: none;"> Determining the difficulty of a text involves assessing various textual features that may impact the reader's text comprehension, yet current research in Vietnamese has only focused on statistical features. This paper introduces a new approach that integrates statistical and semantic approaches to assessing text readability. Our research utilized three distinct datasets: the Vietnamese Text Readability Dataset (ViRead), OneStopEnglish, and RACE, with the latter two translated into Vietnamese. Advanced semantic analysis methods were employed for the semantic aspect using state-of-the-art language models such as PhoBERT, ViDeBERTa, and ViBERT. In addition, statistical methods were incorporated to extract syntactic and lexical features of the text. We conducted experiments using various machine learning models, including Support Vector Machine (SVM), Random Forest, and Extra Trees and evaluated their performance using accuracy and F1 score metrics. Our results indicate that a joint approach that combines semantic and statistical features significantly enhances the accuracy of readability classification compared to using each method in isolation. The current study emphasizes the importance of considering both statistical and semantic aspects for a more accurate assessment of text difficulty in Vietnamese. This contribution to the field provides insights into the adaptability of advanced language models in the context of Vietnamese text readability. It lays the groundwork for future research in this area. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04756v1-abstract-full').style.display = 'none'; document.getElementById('2411.04756v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.04471">arXiv:2411.04471</a> <span> [<a href="https://arxiv.org/pdf/2411.04471">pdf</a>, <a href="https://arxiv.org/format/2411.04471">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> FQsun: A Configurable Wave Function-Based Quantum Emulator for Power-Efficient Quantum Simulations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Vu%2C+T+H">Tuan Hai Vu</a>, <a href="/search/?searchtype=author&query=Le%2C+V+T+D">Vu Trung Duong Le</a>, <a href="/search/?searchtype=author&query=Pham%2C+H+L">Hoai Luan Pham</a>, <a href="/search/?searchtype=author&query=Nguyen%2C+Q+C">Quoc Chuong Nguyen</a>, <a href="/search/?searchtype=author&query=Nakashima%2C+Y">Yasuhiko Nakashima</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.04471v1-abstract-short" style="display: inline;"> Quantum computing has emerged as a powerful tool for solving complex computational problems, but access to real quantum hardware remains limited due to high costs and increasing demand for efficient quantum simulations. Unfortunately, software simulators on CPUs/GPUs such as Qiskit, ProjectQ, and Qsun offer flexibility and support for a large number of qubits, they struggle with high power consump… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04471v1-abstract-full').style.display = 'inline'; document.getElementById('2411.04471v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.04471v1-abstract-full" style="display: none;"> Quantum computing has emerged as a powerful tool for solving complex computational problems, but access to real quantum hardware remains limited due to high costs and increasing demand for efficient quantum simulations. Unfortunately, software simulators on CPUs/GPUs such as Qiskit, ProjectQ, and Qsun offer flexibility and support for a large number of qubits, they struggle with high power consumption and limited processing speed, especially as qubit counts scale. Accordingly, quantum emulators implemented on dedicated hardware, such as FPGAs and analog circuits, offer a promising path for addressing energy efficiency concerns. However, existing studies on hardware-based emulators still face challenges in terms of limited flexibility, lack of fidelity evaluation, and power consumption. To overcome these gaps, we propose FQsun, a quantum emulator that enhances performance by integrating four key innovations: efficient memory organization, a configurable Quantum Gate Unit (QGU), optimized scheduling, and multiple number precisions. Five FQsun versions with different number precisions, including 16-bit floating point, 32-bit floating point, 16-bit fixed point, 24-bit fixed point, and 32-bit fixed point, are implemented on the Xilinx ZCU102 FPGA, utilizing between 9,226 and 18,093 LUTs, 1,440 and 7,031 FFs, 344 and 464 BRAMs, and 14 and 88 DSPs and consuming a maximum power of 2.41W. Experimental results demonstrate high accuracy in normalized gate speed, fidelity, and mean square error, particularly with 32-bit fixed-point and floating-point versions, establishing FQsun's capability as a precise quantum emulator. Benchmarking on quantum algorithms such as Quantum Fourier Transform, Parameter-Shift Rule, and Random Quantum Circuits reveals that FQsun achieves superior power-delay product, outperforming traditional software simulators on powerful CPUs by up to 9,870 times. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.04471v1-abstract-full').style.display = 'none'; document.getElementById('2411.04471v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 14 figures, submitted to the IEEE Transaction on Quantum Engineering</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.03724">arXiv:2411.03724</a> <span> [<a href="https://arxiv.org/pdf/2411.03724">pdf</a>, <a href="https://arxiv.org/format/2411.03724">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Estimation of Psychosocial Work Environment Exposures Through Video Object Detection. Proof of Concept Using CCTV Footage </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Hansen%2C+C+D">Claus D. Hansen</a>, <a href="/search/?searchtype=author&query=Le%2C+T+H">Thuy Hai Le</a>, <a href="/search/?searchtype=author&query=Campos%2C+D">David Campos</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.03724v1-abstract-short" style="display: inline;"> This paper examines the use of computer vision algorithms to estimate aspects of the psychosocial work environment using CCTV footage. We present a proof of concept for a methodology that detects and tracks people in video footage and estimates interactions between customers and employees by estimating their poses and calculating the duration of their encounters. We propose a pipeline that combine… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03724v1-abstract-full').style.display = 'inline'; document.getElementById('2411.03724v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.03724v1-abstract-full" style="display: none;"> This paper examines the use of computer vision algorithms to estimate aspects of the psychosocial work environment using CCTV footage. We present a proof of concept for a methodology that detects and tracks people in video footage and estimates interactions between customers and employees by estimating their poses and calculating the duration of their encounters. We propose a pipeline that combines existing object detection and tracking algorithms (YOLOv8 and DeepSORT) with pose estimation algorithms (BlazePose) to estimate the number of customers and employees in the footage as well as the duration of their encounters. We use a simple rule-based approach to classify the interactions as positive, neutral or negative based on three different criteria: distance, duration and pose. The proposed methodology is tested on a small dataset of CCTV footage. While the data is quite limited in particular with respect to the quality of the footage, we have chosen this case as it represents a typical setting where the method could be applied. The results show that the object detection and tracking part of the pipeline has a reasonable performance on the dataset with a high degree of recall and reasonable accuracy. At this stage, the pose estimation is still limited to fully detect the type of interactions due to difficulties in tracking employees in the footage. We conclude that the method is a promising alternative to self-reported measures of the psychosocial work environment and could be used in future studies to obtain external observations of the work environment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.03724v1-abstract-full').style.display = 'none'; document.getElementById('2411.03724v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 9 figures, presented at IWOAR 9th International Workshop on Sensor-Based Activity Recognition and Artificial Intelligence, September 26-27, Potsdam, Germany</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.02857">arXiv:2411.02857</a> <span> [<a href="https://arxiv.org/pdf/2411.02857">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Multi-Scale Temporal Analysis for Failure Prediction in Energy Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Le%2C+A">Anh Le</a>, <a href="/search/?searchtype=author&query=Huynh%2C+P+K">Phat K. Huynh</a>, <a href="/search/?searchtype=author&query=Yadav%2C+O+P">Om P. Yadav</a>, <a href="/search/?searchtype=author&query=Le%2C+C">Chau Le</a>, <a href="/search/?searchtype=author&query=Pirim%2C+H">Harun Pirim</a>, <a href="/search/?searchtype=author&query=Le%2C+T+Q">Trung Q. Le</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.02857v1-abstract-short" style="display: inline;"> Many existing models struggle to predict nonlinear behavior during extreme weather conditions. This study proposes a multi-scale temporal analysis for failure prediction in energy systems using PMU data. The model integrates multi-scale analysis with machine learning to capture both short-term and long-term behavior. PMU data lacks labeled states despite logged failure records, making it difficult… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02857v1-abstract-full').style.display = 'inline'; document.getElementById('2411.02857v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.02857v1-abstract-full" style="display: none;"> Many existing models struggle to predict nonlinear behavior during extreme weather conditions. This study proposes a multi-scale temporal analysis for failure prediction in energy systems using PMU data. The model integrates multi-scale analysis with machine learning to capture both short-term and long-term behavior. PMU data lacks labeled states despite logged failure records, making it difficult to distinguish between normal and disturbance conditions. We address this through: (1) Extracting domain features from PMU time series data; (2) Applying multi-scale windows (30s, 60s, 180s) for pattern detection; (3) Using Recursive Feature Elimination to identify key features; (4) Training multiple machine learning models. Key contributions: Identifying significant features across multi-scale windows; Demonstrating LightGBM's superior performance (0.896 precision); Showing multi-scale analysis outperforms single-window models (0.841). Our work focuses on weather-related failures, with plans to extend to equipment failure and lightning events. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.02857v1-abstract-full').style.display = 'none'; document.getElementById('2411.02857v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 3 figures, RAMS 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.00425">arXiv:2411.00425</a> <span> [<a href="https://arxiv.org/pdf/2411.00425">pdf</a>, <a href="https://arxiv.org/format/2411.00425">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Cityscape-Adverse: Benchmarking Robustness of Semantic Segmentation with Realistic Scene Modifications via Diffusion-Based Image Editing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Suryanto%2C+N">Naufal Suryanto</a>, <a href="/search/?searchtype=author&query=Adiputra%2C+A+A">Andro Aprila Adiputra</a>, <a href="/search/?searchtype=author&query=Kadiptya%2C+A+Y">Ahmada Yusril Kadiptya</a>, <a href="/search/?searchtype=author&query=Le%2C+T">Thi-Thu-Huong Le</a>, <a href="/search/?searchtype=author&query=Pratama%2C+D">Derry Pratama</a>, <a href="/search/?searchtype=author&query=Kim%2C+Y">Yongsu Kim</a>, <a href="/search/?searchtype=author&query=Kim%2C+H">Howon Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.00425v1-abstract-short" style="display: inline;"> Recent advancements in generative AI, particularly diffusion-based image editing, have enabled the transformation of images into highly realistic scenes using only text instructions. This technology offers significant potential for generating diverse synthetic datasets to evaluate model robustness. In this paper, we introduce Cityscape-Adverse, a benchmark that employs diffusion-based image editin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00425v1-abstract-full').style.display = 'inline'; document.getElementById('2411.00425v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.00425v1-abstract-full" style="display: none;"> Recent advancements in generative AI, particularly diffusion-based image editing, have enabled the transformation of images into highly realistic scenes using only text instructions. This technology offers significant potential for generating diverse synthetic datasets to evaluate model robustness. In this paper, we introduce Cityscape-Adverse, a benchmark that employs diffusion-based image editing to simulate eight adverse conditions, including variations in weather, lighting, and seasons, while preserving the original semantic labels. We evaluate the reliability of diffusion-based models in generating realistic scene modifications and assess the performance of state-of-the-art CNN and Transformer-based semantic segmentation models under these challenging conditions. Additionally, we analyze which modifications have the greatest impact on model performance and explore how training on synthetic datasets can improve robustness in real-world adverse scenarios. Our results demonstrate that all tested models, particularly CNN-based architectures, experienced significant performance degradation under extreme conditions, while Transformer-based models exhibited greater resilience. We verify that models trained on Cityscape-Adverse show significantly enhanced resilience when applied to unseen domains. Code and datasets will be released at https://github.com/naufalso/cityscape-adverse. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00425v1-abstract-full').style.display = 'none'; document.getElementById('2411.00425v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">19 pages, under review, code and dataset will be available at https://github.com/naufalso/cityscape-adverse</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.00209">arXiv:2411.00209</a> <span> [<a href="https://arxiv.org/pdf/2411.00209">pdf</a>, <a href="https://arxiv.org/format/2411.00209">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Semantic Knowledge Distillation for Onboard Satellite Earth Observation Image Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Le%2C+T">Thanh-Dung Le</a>, <a href="/search/?searchtype=author&query=Ha%2C+V+N">Vu Nguyen Ha</a>, <a href="/search/?searchtype=author&query=Nguyen%2C+T+T">Ti Ti Nguyen</a>, <a href="/search/?searchtype=author&query=Eappen%2C+G">Geoffrey Eappen</a>, <a href="/search/?searchtype=author&query=Thiruvasagam%2C+P">Prabhu Thiruvasagam</a>, <a href="/search/?searchtype=author&query=Chou%2C+H">Hong-fu Chou</a>, <a href="/search/?searchtype=author&query=Tran%2C+D">Duc-Dung Tran</a>, <a href="/search/?searchtype=author&query=Garces-Socarras%2C+L+M">Luis M. Garces-Socarras</a>, <a href="/search/?searchtype=author&query=Gonzalez-Rios%2C+J+L">Jorge L. Gonzalez-Rios</a>, <a href="/search/?searchtype=author&query=Merlano-Duncan%2C+J+C">Juan Carlos Merlano-Duncan</a>, <a href="/search/?searchtype=author&query=Chatzinotas%2C+S">Symeon Chatzinotas</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.00209v1-abstract-short" style="display: inline;"> This study presents an innovative dynamic weighting knowledge distillation (KD) framework tailored for efficient Earth observation (EO) image classification (IC) in resource-constrained settings. Utilizing EfficientViT and MobileViT as teacher models, this framework enables lightweight student models, particularly ResNet8 and ResNet16, to surpass 90% in accuracy, precision, and recall, adhering to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00209v1-abstract-full').style.display = 'inline'; document.getElementById('2411.00209v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.00209v1-abstract-full" style="display: none;"> This study presents an innovative dynamic weighting knowledge distillation (KD) framework tailored for efficient Earth observation (EO) image classification (IC) in resource-constrained settings. Utilizing EfficientViT and MobileViT as teacher models, this framework enables lightweight student models, particularly ResNet8 and ResNet16, to surpass 90% in accuracy, precision, and recall, adhering to the stringent confidence thresholds necessary for reliable classification tasks. Unlike conventional KD methods that rely on static weight distribution, our adaptive weighting mechanism responds to each teacher model's confidence, allowing student models to prioritize more credible sources of knowledge dynamically. Remarkably, ResNet8 delivers substantial efficiency gains, achieving a 97.5% reduction in parameters, a 96.7% decrease in FLOPs, an 86.2% cut in power consumption, and a 63.5% increase in inference speed over MobileViT. This significant optimization of complexity and resource demands establishes ResNet8 as an optimal candidate for EO tasks, combining robust performance with feasibility in deployment. The confidence-based, adaptable KD approach underscores the potential of dynamic distillation strategies to yield high-performing, resource-efficient models tailored for satellite-based EO applications. The reproducible code is accessible on our GitHub repository. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.00209v1-abstract-full').style.display = 'none'; document.getElementById('2411.00209v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Under revisions</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.23227">arXiv:2410.23227</a> <span> [<a href="https://arxiv.org/pdf/2410.23227">pdf</a>, <a href="https://arxiv.org/format/2410.23227">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> (FL)$^2$: Overcoming Few Labels in Federated Semi-Supervised Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Lee%2C+S">Seungjoo Lee</a>, <a href="/search/?searchtype=author&query=Le%2C+T+V">Thanh-Long V. Le</a>, <a href="/search/?searchtype=author&query=Shin%2C+J">Jaemin Shin</a>, <a href="/search/?searchtype=author&query=Lee%2C+S">Sung-Ju Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.23227v2-abstract-short" style="display: inline;"> Federated Learning (FL) is a distributed machine learning framework that trains accurate global models while preserving clients' privacy-sensitive data. However, most FL approaches assume that clients possess labeled data, which is often not the case in practice. Federated Semi-Supervised Learning (FSSL) addresses this label deficiency problem, targeting situations where only the server has a smal… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23227v2-abstract-full').style.display = 'inline'; document.getElementById('2410.23227v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.23227v2-abstract-full" style="display: none;"> Federated Learning (FL) is a distributed machine learning framework that trains accurate global models while preserving clients' privacy-sensitive data. However, most FL approaches assume that clients possess labeled data, which is often not the case in practice. Federated Semi-Supervised Learning (FSSL) addresses this label deficiency problem, targeting situations where only the server has a small amount of labeled data while clients do not. However, a significant performance gap exists between Centralized Semi-Supervised Learning (SSL) and FSSL. This gap arises from confirmation bias, which is more pronounced in FSSL due to multiple local training epochs and the separation of labeled and unlabeled data. We propose $(FL)^2$, a robust training method for unlabeled clients using sharpness-aware consistency regularization. We show that regularizing the original pseudo-labeling loss is suboptimal, and hence we carefully select unlabeled samples for regularization. We further introduce client-specific adaptive thresholding and learning status-aware aggregation to adjust the training process based on the learning progress of each client. Our experiments on three benchmark datasets demonstrate that our approach significantly improves performance and bridges the gap with SSL, particularly in scenarios with scarce labeled data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.23227v2-abstract-full').style.display = 'none'; document.getElementById('2410.23227v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.21916">arXiv:2410.21916</a> <span> [<a href="https://arxiv.org/pdf/2410.21916">pdf</a>, <a href="https://arxiv.org/format/2410.21916">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Cognitive Semantic Augmentation LEO Satellite Networks for Earth Observation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chou%2C+H">Hong-fu Chou</a>, <a href="/search/?searchtype=author&query=Ha%2C+V+N">Vu Nguyen Ha</a>, <a href="/search/?searchtype=author&query=Thiruvasagam%2C+P">Prabhu Thiruvasagam</a>, <a href="/search/?searchtype=author&query=Le%2C+T">Thanh-Dung Le</a>, <a href="/search/?searchtype=author&query=Eappen%2C+G">Geoffrey Eappen</a>, <a href="/search/?searchtype=author&query=Nguyen%2C+T+T">Ti Ti Nguyen</a>, <a href="/search/?searchtype=author&query=Tran%2C+D+D">Duc Dung Tran</a>, <a href="/search/?searchtype=author&query=Garces-Socarras%2C+L+M">Luis M. Garces-Socarras</a>, <a href="/search/?searchtype=author&query=Merlano-Duncan%2C+J+C">Juan Carlos Merlano-Duncan</a>, <a href="/search/?searchtype=author&query=Chatzinotas%2C+S">Symeon Chatzinotas</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.21916v1-abstract-short" style="display: inline;"> Earth observation (EO) systems are essential for mapping, catastrophe monitoring, and resource management, but they have trouble processing and sending large amounts of EO data efficiently, especially for specialized applications like agriculture and real-time disaster response. This paper presents a novel framework for semantic communication in EO satellite networks, aimed at enhancing data trans… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21916v1-abstract-full').style.display = 'inline'; document.getElementById('2410.21916v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.21916v1-abstract-full" style="display: none;"> Earth observation (EO) systems are essential for mapping, catastrophe monitoring, and resource management, but they have trouble processing and sending large amounts of EO data efficiently, especially for specialized applications like agriculture and real-time disaster response. This paper presents a novel framework for semantic communication in EO satellite networks, aimed at enhancing data transmission efficiency and system performance through cognitive processing techniques. The proposed system leverages Discrete Task-Oriented Joint Source-Channel Coding (DT-JSCC) and Semantic Data Augmentation (SA) integrate cognitive semantic processing with inter-satellite links, enabling efficient analysis and transmission of multispectral imagery for improved object detection, pattern recognition, and real-time decision-making. Cognitive Semantic Augmentation (CSA) is introduced to enhance a system's capability to process and transmit semantic information, improving feature prioritization, consistency, and adaptation to changing communication and application needs. The end-to-end architecture is designed for next-generation satellite networks, such as those supporting 6G, demonstrating significant improvements in fewer communication rounds and better accuracy over federated learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21916v1-abstract-full').style.display = 'none'; document.getElementById('2410.21916v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 Pages, 5 figures, Magazine. arXiv admin note: substantial text overlap with arXiv:2409.15246</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.21838">arXiv:2410.21838</a> <span> [<a href="https://arxiv.org/pdf/2410.21838">pdf</a>, <a href="https://arxiv.org/format/2410.21838">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> </div> </div> <p class="title is-5 mathjax"> Fractal structure, depinning, and hysteresis of dislocations in high-entropy alloys </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Le%2C+H+T">Hoa Thi Le</a>, <a href="/search/?searchtype=author&query=N%C3%B6hring%2C+W+G">Wolfram G. N枚hring</a>, <a href="/search/?searchtype=author&query=Pastewka%2C+L">Lars Pastewka</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.21838v1-abstract-short" style="display: inline;"> High-entropy alloys (HEAs) are complex alloys containing multiple elements in high concentrations. Plasticity in HEAs is carried by dislocations, but the random nature of their composition pins dislocations, effectively hindering their motion. We investigate the resulting complex structure of the dislocation in terms of spatial correlation functions, which allow us to draw conclusions on the fract… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21838v1-abstract-full').style.display = 'inline'; document.getElementById('2410.21838v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.21838v1-abstract-full" style="display: none;"> High-entropy alloys (HEAs) are complex alloys containing multiple elements in high concentrations. Plasticity in HEAs is carried by dislocations, but the random nature of their composition pins dislocations, effectively hindering their motion. We investigate the resulting complex structure of the dislocation in terms of spatial correlation functions, which allow us to draw conclusions on the fractal geometry of the dislocation. At high temperature, where thermal fluctuations dominate, dislocations adopt the structure of a random walk with Hurst exponent $1/2$ or fractal dimension $3/2$. At low temperature we find larger Hurst exponents (lower dimensions), with a crossover to an uncorrelated structure beyond a correlation length. These changes in structure are accompanied by an emergence of hysteresis (and hence pinning) in the motion of the dislocation at low temperature. We use a modified Labusch/Edwards-Wilkinson-model to argue that this correlation length must be an intrinsic property of the HEA. This means dislocations in HEAs are an individual pinning limit, where segments of the dislocation are independently pinned by local distortions of the crystal lattice that are induced by chemical heterogeneity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.21838v1-abstract-full').style.display = 'none'; document.getElementById('2410.21838v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.20471">arXiv:2410.20471</a> <span> [<a href="https://arxiv.org/pdf/2410.20471">pdf</a>, <a href="https://arxiv.org/ps/2410.20471">ps</a>, <a href="https://arxiv.org/format/2410.20471">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Data Structures and Algorithms">cs.DS</span> </div> </div> <p class="title is-5 mathjax"> Improved Online Reachability Preservers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Bodwin%2C+G">Greg Bodwin</a>, <a href="/search/?searchtype=author&query=Le%2C+T">Tuong Le</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.20471v1-abstract-short" style="display: inline;"> A reachability preserver is a basic kind of graph sparsifier, which preserves the reachability relation of an $n$-node directed input graph $G$ among a set of given demand pairs $P$ of size $|P|=p$. We give constructions of sparse reachability preservers in the online setting, where $G$ is given on input, the demand pairs $(s, t) \in P$ arrive one at a time, and we must irrevocably add edges to a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20471v1-abstract-full').style.display = 'inline'; document.getElementById('2410.20471v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.20471v1-abstract-full" style="display: none;"> A reachability preserver is a basic kind of graph sparsifier, which preserves the reachability relation of an $n$-node directed input graph $G$ among a set of given demand pairs $P$ of size $|P|=p$. We give constructions of sparse reachability preservers in the online setting, where $G$ is given on input, the demand pairs $(s, t) \in P$ arrive one at a time, and we must irrevocably add edges to a preserver $H$ to ensure reachability for the pair $(s, t)$ before we can see the next demand pair. Our main results are: -- There is a construction that guarantees a maximum preserver size of $$|E(H)| \le O\left( n^{0.72}p^{0.56} + n^{0.6}p^{0.7} + n\right).$$ This improves polynomially on the previous online upper bound of $O( \min\{np^{0.5}, n^{0.5}p\}) + n$, implicit in the work of Coppersmith and Elkin [SODA '05]. -- Given a promise that the demand pairs will satisfy $P \subseteq S \times V$ for some vertex set $S$ of size $|S|=:蟽$, there is a construction that guarantees a maximum preserver size of $$|E(H)| \le O\left( (np蟽)^{1/2} + n\right).$$ A slightly different construction gives the same result for the setting $P \subseteq V \times S$. This improves polynomially on the previous online upper bound of $O( 蟽n)$ (folklore). All of these constructions are polynomial time, deterministic, and they do not require knowledge of the values of $p, 蟽$, or $S$. Our techniques also give a small polynomial improvement in the current upper bounds for offline reachability preservers, and they extend to a stronger model in which we must commit to a path for all possible reachable pairs in $G$ before any demand pairs have been received. As an application, we improve the competitive ratio for Online Unweighted Directed Steiner Forest to $O(n^{3/5 + \varepsilon})$. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.20471v1-abstract-full').style.display = 'none'; document.getElementById('2410.20471v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">SODA 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.16597">arXiv:2410.16597</a> <span> [<a href="https://arxiv.org/pdf/2410.16597">pdf</a>, <a href="https://arxiv.org/format/2410.16597">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Distill-SynthKG: Distilling Knowledge Graph Synthesis Workflow for Improved Coverage and Efficiency </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Choubey%2C+P+K">Prafulla Kumar Choubey</a>, <a href="/search/?searchtype=author&query=Su%2C+X">Xin Su</a>, <a href="/search/?searchtype=author&query=Luo%2C+M">Man Luo</a>, <a href="/search/?searchtype=author&query=Peng%2C+X">Xiangyu Peng</a>, <a href="/search/?searchtype=author&query=Xiong%2C+C">Caiming Xiong</a>, <a href="/search/?searchtype=author&query=Le%2C+T">Tiep Le</a>, <a href="/search/?searchtype=author&query=Rosenman%2C+S">Shachar Rosenman</a>, <a href="/search/?searchtype=author&query=Lal%2C+V">Vasudev Lal</a>, <a href="/search/?searchtype=author&query=Mui%2C+P">Phil Mui</a>, <a href="/search/?searchtype=author&query=Ho%2C+R">Ricky Ho</a>, <a href="/search/?searchtype=author&query=Howard%2C+P">Phillip Howard</a>, <a href="/search/?searchtype=author&query=Wu%2C+C">Chien-Sheng Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.16597v1-abstract-short" style="display: inline;"> Knowledge graphs (KGs) generated by large language models (LLMs) are becoming increasingly valuable for Retrieval-Augmented Generation (RAG) applications that require knowledge-intensive reasoning. However, existing KG extraction methods predominantly rely on prompt-based approaches, which are inefficient for processing large-scale corpora. These approaches often suffer from information loss, part… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16597v1-abstract-full').style.display = 'inline'; document.getElementById('2410.16597v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.16597v1-abstract-full" style="display: none;"> Knowledge graphs (KGs) generated by large language models (LLMs) are becoming increasingly valuable for Retrieval-Augmented Generation (RAG) applications that require knowledge-intensive reasoning. However, existing KG extraction methods predominantly rely on prompt-based approaches, which are inefficient for processing large-scale corpora. These approaches often suffer from information loss, particularly with long documents, due to the lack of specialized design for KG construction. Additionally, there is a gap in evaluation datasets and methodologies for ontology-free KG construction. To overcome these limitations, we propose SynthKG, a multi-step, document-level ontology-free KG synthesis workflow based on LLMs. By fine-tuning a smaller LLM on the synthesized document-KG pairs, we streamline the multi-step process into a single-step KG generation approach called Distill-SynthKG, substantially reducing the number of LLM inference calls. Furthermore, we re-purpose existing question-answering datasets to establish KG evaluation datasets and introduce new evaluation metrics. Using KGs produced by Distill-SynthKG, we also design a novel graph-based retrieval framework for RAG. Experimental results demonstrate that Distill-SynthKG not only surpasses all baseline models in KG quality -- including models up to eight times larger -- but also consistently excels in retrieval and question-answering tasks. Our proposed graph retrieval framework also outperforms all KG-retrieval methods across multiple benchmark datasets. We release the SynthKG dataset and Distill-SynthKG model publicly to support further research and development. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.16597v1-abstract-full').style.display = 'none'; document.getElementById('2410.16597v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.15648">arXiv:2410.15648</a> <span> [<a href="https://arxiv.org/pdf/2410.15648">pdf</a>, <a href="https://arxiv.org/format/2410.15648">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Methodology">stat.ME</span> </div> </div> <p class="title is-5 mathjax"> Linking Model Intervention to Causal Interpretation in Model Explanation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Cheng%2C+D">Debo Cheng</a>, <a href="/search/?searchtype=author&query=Xu%2C+Z">Ziqi Xu</a>, <a href="/search/?searchtype=author&query=Li%2C+J">Jiuyong Li</a>, <a href="/search/?searchtype=author&query=Liu%2C+L">Lin Liu</a>, <a href="/search/?searchtype=author&query=Yu%2C+K">Kui Yu</a>, <a href="/search/?searchtype=author&query=Le%2C+T+D">Thuc Duy Le</a>, <a href="/search/?searchtype=author&query=Liu%2C+J">Jixue Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.15648v1-abstract-short" style="display: inline;"> Intervention intuition is often used in model explanation where the intervention effect of a feature on the outcome is quantified by the difference of a model prediction when the feature value is changed from the current value to the baseline value. Such a model intervention effect of a feature is inherently association. In this paper, we will study the conditions when an intuitive model intervent… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15648v1-abstract-full').style.display = 'inline'; document.getElementById('2410.15648v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.15648v1-abstract-full" style="display: none;"> Intervention intuition is often used in model explanation where the intervention effect of a feature on the outcome is quantified by the difference of a model prediction when the feature value is changed from the current value to the baseline value. Such a model intervention effect of a feature is inherently association. In this paper, we will study the conditions when an intuitive model intervention effect has a causal interpretation, i.e., when it indicates whether a feature is a direct cause of the outcome. This work links the model intervention effect to the causal interpretation of a model. Such an interpretation capability is important since it indicates whether a machine learning model is trustworthy to domain experts. The conditions also reveal the limitations of using a model intervention effect for causal interpretation in an environment with unobserved features. Experiments on semi-synthetic datasets have been conducted to validate theorems and show the potential for using the model intervention effect for model interpretation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15648v1-abstract-full').style.display = 'none'; document.getElementById('2410.15648v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.15618">arXiv:2410.15618</a> <span> [<a href="https://arxiv.org/pdf/2410.15618">pdf</a>, <a href="https://arxiv.org/format/2410.15618">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Erasing Undesirable Concepts in Diffusion Models with Adversarial Preservation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Bui%2C+A">Anh Bui</a>, <a href="/search/?searchtype=author&query=Vuong%2C+L">Long Vuong</a>, <a href="/search/?searchtype=author&query=Doan%2C+K">Khanh Doan</a>, <a href="/search/?searchtype=author&query=Le%2C+T">Trung Le</a>, <a href="/search/?searchtype=author&query=Montague%2C+P">Paul Montague</a>, <a href="/search/?searchtype=author&query=Abraham%2C+T">Tamas Abraham</a>, <a href="/search/?searchtype=author&query=Phung%2C+D">Dinh Phung</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.15618v2-abstract-short" style="display: inline;"> Diffusion models excel at generating visually striking content from text but can inadvertently produce undesirable or harmful content when trained on unfiltered internet data. A practical solution is to selectively removing target concepts from the model, but this may impact the remaining concepts. Prior approaches have tried to balance this by introducing a loss term to preserve neutral content o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15618v2-abstract-full').style.display = 'inline'; document.getElementById('2410.15618v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.15618v2-abstract-full" style="display: none;"> Diffusion models excel at generating visually striking content from text but can inadvertently produce undesirable or harmful content when trained on unfiltered internet data. A practical solution is to selectively removing target concepts from the model, but this may impact the remaining concepts. Prior approaches have tried to balance this by introducing a loss term to preserve neutral content or a regularization term to minimize changes in the model parameters, yet resolving this trade-off remains challenging. In this work, we propose to identify and preserving concepts most affected by parameter changes, termed as \textit{adversarial concepts}. This approach ensures stable erasure with minimal impact on the other concepts. We demonstrate the effectiveness of our method using the Stable Diffusion model, showing that it outperforms state-of-the-art erasure methods in eliminating unwanted content while maintaining the integrity of other unrelated elements. Our code is available at \url{https://github.com/tuananhbui89/Erasing-Adversarial-Preservation}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.15618v2-abstract-full').style.display = 'none'; document.getElementById('2410.15618v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.13895">arXiv:2410.13895</a> <span> [<a href="https://arxiv.org/pdf/2410.13895">pdf</a>, <a href="https://arxiv.org/format/2410.13895">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Nuclear Experiment">nucl-ex</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="High Energy Physics - Experiment">hep-ex</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="High Energy Physics - Phenomenology">hep-ph</span> </div> </div> <p class="title is-5 mathjax"> First constraints on general neutrino interactions based on KATRIN data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Aker%2C+M">M. Aker</a>, <a href="/search/?searchtype=author&query=Batzler%2C+D">D. Batzler</a>, <a href="/search/?searchtype=author&query=Beglarian%2C+A">A. Beglarian</a>, <a href="/search/?searchtype=author&query=Beisenk%C3%B6tter%2C+J">J. Beisenk枚tter</a>, <a href="/search/?searchtype=author&query=Biassoni%2C+M">M. Biassoni</a>, <a href="/search/?searchtype=author&query=Bieringer%2C+B">B. Bieringer</a>, <a href="/search/?searchtype=author&query=Biondi%2C+Y">Y. Biondi</a>, <a href="/search/?searchtype=author&query=Block%2C+F">F. Block</a>, <a href="/search/?searchtype=author&query=Bornschein%2C+B">B. Bornschein</a>, <a href="/search/?searchtype=author&query=Bornschein%2C+L">L. Bornschein</a>, <a href="/search/?searchtype=author&query=B%C3%B6ttcher%2C+M">M. B枚ttcher</a>, <a href="/search/?searchtype=author&query=Carminati%2C+M">M. Carminati</a>, <a href="/search/?searchtype=author&query=Chatrabhuti%2C+A">A. Chatrabhuti</a>, <a href="/search/?searchtype=author&query=Chilingaryan%2C+S">S. Chilingaryan</a>, <a href="/search/?searchtype=author&query=Daniel%2C+B+A">B. A. Daniel</a>, <a href="/search/?searchtype=author&query=Descher%2C+M">M. Descher</a>, <a href="/search/?searchtype=author&query=Barrero%2C+D+D">D. D铆az Barrero</a>, <a href="/search/?searchtype=author&query=Doe%2C+P+J">P. J. Doe</a>, <a href="/search/?searchtype=author&query=Dragoun%2C+O">O. Dragoun</a>, <a href="/search/?searchtype=author&query=Drexlin%2C+G">G. Drexlin</a>, <a href="/search/?searchtype=author&query=Edzards%2C+F">F. Edzards</a>, <a href="/search/?searchtype=author&query=Eitel%2C+K">K. Eitel</a>, <a href="/search/?searchtype=author&query=Ellinger%2C+E">E. Ellinger</a>, <a href="/search/?searchtype=author&query=Engel%2C+R">R. Engel</a>, <a href="/search/?searchtype=author&query=Enomoto%2C+S">S. Enomoto</a> , et al. (108 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.13895v2-abstract-short" style="display: inline;"> The precision measurement of the tritium $尾$-decay spectrum performed by the KATRIN experiment provides a unique way to search for general neutrino interactions (GNI). All theoretical allowed GNI terms involving neutrinos are incorporated into a low-energy effective field theory, and can be identified by specific signatures in the measured tritium $尾$-spectrum. In this paper an effective descripti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13895v2-abstract-full').style.display = 'inline'; document.getElementById('2410.13895v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.13895v2-abstract-full" style="display: none;"> The precision measurement of the tritium $尾$-decay spectrum performed by the KATRIN experiment provides a unique way to search for general neutrino interactions (GNI). All theoretical allowed GNI terms involving neutrinos are incorporated into a low-energy effective field theory, and can be identified by specific signatures in the measured tritium $尾$-spectrum. In this paper an effective description of the impact of GNI on the $尾$-spectrum is formulated and the first constraints on the effective GNI parameters are derived based on the 4 million electrons collected in the second measurement campaign of KATRIN in 2019. In addition, constraints on selected types of interactions are investigated, thereby exploring the potential of KATRIN to search for more specific new physics cases, including a right-handed W boson, a charged Higgs or leptoquarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.13895v2-abstract-full').style.display = 'none'; document.getElementById('2410.13895v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.12027">arXiv:2410.12027</a> <span> [<a href="https://arxiv.org/pdf/2410.12027">pdf</a>, <a href="https://arxiv.org/format/2410.12027">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Fluid Dynamics">physics.flu-dyn</span> </div> </div> <p class="title is-5 mathjax"> Modal analysis of blood flows in saccular aneurysms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Nguyen%2C+T">Thien-Tam Nguyen</a>, <a href="/search/?searchtype=author&query=Kasperski%2C+D">Davina Kasperski</a>, <a href="/search/?searchtype=author&query=Huynh%2C+P+K">Phat Kim Huynh</a>, <a href="/search/?searchtype=author&query=Le%2C+T+Q">Trung Quoc Le</a>, <a href="/search/?searchtype=author&query=Le%2C+T+B">Trung Bao Le</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.12027v1-abstract-short" style="display: inline;"> Currently, it is challenging to investigate aneurismal hemodynamics based on current in-vivo data such as Magnetic Resonance Imaging or Computed Tomography due to the limitations in both spatial and temporal resolutions. In this work, we investigate the use of modal analysis at various resolutions to examine its usefulness for analyzing blood flows in brain aneurysms. Two variants of Dynamic Mode… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12027v1-abstract-full').style.display = 'inline'; document.getElementById('2410.12027v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.12027v1-abstract-full" style="display: none;"> Currently, it is challenging to investigate aneurismal hemodynamics based on current in-vivo data such as Magnetic Resonance Imaging or Computed Tomography due to the limitations in both spatial and temporal resolutions. In this work, we investigate the use of modal analysis at various resolutions to examine its usefulness for analyzing blood flows in brain aneurysms. Two variants of Dynamic Mode Decomposition (DMD): (i) Hankel-DMD; and (ii) Optimized-DMD, are used to extract the time-dependent dynamics of blood flows during one cardiac cycle. First, high-resolution hemodynamic data in patient-specific aneurysms are obtained using Computational Fluid Dynamics. Second, the dynamics modes, along with their spatial amplitudes and temporal magnitudes are calculated using the DMD analysis. Third, an examination of DMD analyses using a range of spatial and temporal resolutions of hemodynamic data to validate the applicability of DMD for low-resolution data, similar to ones in clinical practices. Our results show that DMD is able to characterize the inflow jet dynamics by separating large-scale structures and flow instabilities even at low spatial and temporal resolutions. Its robustness in quantifying the flow dynamics using the energy spectrum is demonstrated across different resolutions in all aneurysms in our study population. Our work indicates that DMD can be used for analyzing blood flow patterns of brain aneurysms and is a promising tool to be explored in in-vivo. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.12027v1-abstract-full').style.display = 'none'; document.getElementById('2410.12027v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.11325">arXiv:2410.11325</a> <span> [<a href="https://arxiv.org/pdf/2410.11325">pdf</a>, <a href="https://arxiv.org/format/2410.11325">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Speculative Knowledge Distillation: Bridging the Teacher-Student Gap Through Interleaved Sampling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Xu%2C+W">Wenda Xu</a>, <a href="/search/?searchtype=author&query=Han%2C+R">Rujun Han</a>, <a href="/search/?searchtype=author&query=Wang%2C+Z">Zifeng Wang</a>, <a href="/search/?searchtype=author&query=Le%2C+L+T">Long T. Le</a>, <a href="/search/?searchtype=author&query=Madeka%2C+D">Dhruv Madeka</a>, <a href="/search/?searchtype=author&query=Li%2C+L">Lei Li</a>, <a href="/search/?searchtype=author&query=Wang%2C+W+Y">William Yang Wang</a>, <a href="/search/?searchtype=author&query=Agarwal%2C+R">Rishabh Agarwal</a>, <a href="/search/?searchtype=author&query=Lee%2C+C">Chen-Yu Lee</a>, <a href="/search/?searchtype=author&query=Pfister%2C+T">Tomas Pfister</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.11325v1-abstract-short" style="display: inline;"> Recent advances in knowledge distillation (KD) have enabled smaller student models to approach the performance of larger teacher models. However, popular methods such as supervised KD and on-policy KD, are adversely impacted by the knowledge gaps between teacher-student in practical scenarios. Supervised KD suffers from a distribution mismatch between training with a static dataset and inference o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11325v1-abstract-full').style.display = 'inline'; document.getElementById('2410.11325v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.11325v1-abstract-full" style="display: none;"> Recent advances in knowledge distillation (KD) have enabled smaller student models to approach the performance of larger teacher models. However, popular methods such as supervised KD and on-policy KD, are adversely impacted by the knowledge gaps between teacher-student in practical scenarios. Supervised KD suffers from a distribution mismatch between training with a static dataset and inference over final student-generated outputs. Conversely, on-policy KD, which uses student-generated samples for training, can suffer from low-quality training examples with which teacher models are not familiar, resulting in inaccurate teacher feedback. To address these limitations, we introduce Speculative Knowledge Distillation (SKD), a novel approach that leverages cooperation between student and teacher models to generate high-quality training data on-the-fly while aligning with the student's inference-time distribution. In SKD, the student proposes tokens, and the teacher replaces poorly ranked ones based on its own distribution, transferring high-quality knowledge adaptively. We evaluate SKD on various text generation tasks, including translation, summarization, math, and instruction following, and show that SKD consistently outperforms existing KD methods across different domains, data sizes, and model initialization strategies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11325v1-abstract-full').style.display = 'none'; document.getElementById('2410.11325v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.11146">arXiv:2410.11146</a> <span> [<a href="https://arxiv.org/pdf/2410.11146">pdf</a>, <a href="https://arxiv.org/format/2410.11146">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> Theoretical Analysis of the Efficient-Memory Matrix Storage Method for Quantum Emulation Accelerators with Gate Fusion on FPGAs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Le%2C+T+X+H">Tran Xuan Hieu Le</a>, <a href="/search/?searchtype=author&query=Pham%2C+H+L">Hoai Luan Pham</a>, <a href="/search/?searchtype=author&query=Vu%2C+T+H">Tuan Hai Vu</a>, <a href="/search/?searchtype=author&query=Le%2C+V+T+D">Vu Trung Duong Le</a>, <a href="/search/?searchtype=author&query=Yasuhiko%2C+N">Nakashima Yasuhiko</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.11146v1-abstract-short" style="display: inline;"> Quantum emulators play an important role in the development and testing of quantum algorithms, especially given the limitations of the current FTQC era. Developing high-speed, memory-optimized quantum emulators is a growing research trend, with gate fusion being a promising technique. However, existing gate fusion implementations often struggle to efficiently support large-scale quantum systems wi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11146v1-abstract-full').style.display = 'inline'; document.getElementById('2410.11146v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.11146v1-abstract-full" style="display: none;"> Quantum emulators play an important role in the development and testing of quantum algorithms, especially given the limitations of the current FTQC era. Developing high-speed, memory-optimized quantum emulators is a growing research trend, with gate fusion being a promising technique. However, existing gate fusion implementations often struggle to efficiently support large-scale quantum systems with a high number of qubits due to a lack of optimizations for the exponential growth in memory requirements. Therefore, this study proposes the EMMS (Efficient-Memory Matrix Storage) method for storing quantum operators and states, along with an EMMS-based Quantum Emulator Accelerator (QEA) architecture that incorporates multiple processing elements (PEs) to accelerate tensor product and matrix multiplication computations in quantum emulation with gate fusion. The theoretical analysis of the QEA on the Xilinx ZCU102 FPGA, using varying numbers of PEs and different depths of unitary and local data memory, reveals a linear increase in memory depth with the number of qubits. This scaling highlights the potential of the EMMS-based QEA to accommodate larger quantum circuits, providing insights into selecting appropriate memory sizes and FPGA devices. Furthermore, the estimated performance of the QEA with PE counts ranging from $2^2$ to $2^5$ on the Xilinx ZCU102 FPGA demonstrates that increasing the number of PEs significantly reduces the computation cycle count for circuits with fewer than 18 qubits, making it significantly faster than previous works. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.11146v1-abstract-full').style.display = 'none'; document.getElementById('2410.11146v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.08905">arXiv:2410.08905</a> <span> [<a href="https://arxiv.org/pdf/2410.08905">pdf</a>, <a href="https://arxiv.org/format/2410.08905">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Lifelong Event Detection via Optimal Transport </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Dao%2C+V">Viet Dao</a>, <a href="/search/?searchtype=author&query=Pham%2C+V">Van-Cuong Pham</a>, <a href="/search/?searchtype=author&query=Tran%2C+Q">Quyen Tran</a>, <a href="/search/?searchtype=author&query=Le%2C+T">Thanh-Thien Le</a>, <a href="/search/?searchtype=author&query=Van%2C+L+N">Linh Ngo Van</a>, <a href="/search/?searchtype=author&query=Nguyen%2C+T+H">Thien Huu Nguyen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.08905v1-abstract-short" style="display: inline;"> Continual Event Detection (CED) poses a formidable challenge due to the catastrophic forgetting phenomenon, where learning new tasks (with new coming event types) hampers performance on previous ones. In this paper, we introduce a novel approach, Lifelong Event Detection via Optimal Transport (LEDOT), that leverages optimal transport principles to align the optimization of our classification modul… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08905v1-abstract-full').style.display = 'inline'; document.getElementById('2410.08905v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.08905v1-abstract-full" style="display: none;"> Continual Event Detection (CED) poses a formidable challenge due to the catastrophic forgetting phenomenon, where learning new tasks (with new coming event types) hampers performance on previous ones. In this paper, we introduce a novel approach, Lifelong Event Detection via Optimal Transport (LEDOT), that leverages optimal transport principles to align the optimization of our classification module with the intrinsic nature of each class, as defined by their pre-trained language modeling. Our method integrates replay sets, prototype latent representations, and an innovative Optimal Transport component. Extensive experiments on MAVEN and ACE datasets demonstrate LEDOT's superior performance, consistently outperforming state-of-the-art baselines. The results underscore LEDOT as a pioneering solution in continual event detection, offering a more effective and nuanced approach to addressing catastrophic forgetting in evolving environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.08905v1-abstract-full').style.display = 'none'; document.getElementById('2410.08905v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to EMNLP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04327">arXiv:2410.04327</a> <span> [<a href="https://arxiv.org/pdf/2410.04327">pdf</a>, <a href="https://arxiv.org/ps/2410.04327">ps</a>, <a href="https://arxiv.org/format/2410.04327">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Leveraging Hierarchical Taxonomies in Prompt-based Continual Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Tran%2C+Q">Quyen Tran</a>, <a href="/search/?searchtype=author&query=Phan%2C+H">Hoang Phan</a>, <a href="/search/?searchtype=author&query=Le%2C+M">Minh Le</a>, <a href="/search/?searchtype=author&query=Truong%2C+T">Tuan Truong</a>, <a href="/search/?searchtype=author&query=Phung%2C+D">Dinh Phung</a>, <a href="/search/?searchtype=author&query=Ngo%2C+L">Linh Ngo</a>, <a href="/search/?searchtype=author&query=Nguyen%2C+T">Thien Nguyen</a>, <a href="/search/?searchtype=author&query=Ho%2C+N">Nhat Ho</a>, <a href="/search/?searchtype=author&query=Le%2C+T">Trung Le</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04327v2-abstract-short" style="display: inline;"> Drawing inspiration from human learning behaviors, this work proposes a novel approach to mitigate catastrophic forgetting in Prompt-based Continual Learning models by exploiting the relationships between continuously emerging class data. We find that applying human habits of organizing and connecting information can serve as an efficient strategy when training deep learning models. Specifically,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04327v2-abstract-full').style.display = 'inline'; document.getElementById('2410.04327v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04327v2-abstract-full" style="display: none;"> Drawing inspiration from human learning behaviors, this work proposes a novel approach to mitigate catastrophic forgetting in Prompt-based Continual Learning models by exploiting the relationships between continuously emerging class data. We find that applying human habits of organizing and connecting information can serve as an efficient strategy when training deep learning models. Specifically, by building a hierarchical tree structure based on the expanding set of labels, we gain fresh insights into the data, identifying groups of similar classes could easily cause confusion. Additionally, we delve deeper into the hidden connections between classes by exploring the original pretrained model's behavior through an optimal transport-based approach. From these insights, we propose a novel regularization loss function that encourages models to focus more on challenging knowledge areas, thereby enhancing overall performance. Experimentally, our method demonstrated significant superiority over the most robust state-of-the-art models on various benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04327v2-abstract-full').style.display = 'none'; document.getElementById('2410.04327v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.04196">arXiv:2410.04196</a> <span> [<a href="https://arxiv.org/pdf/2410.04196">pdf</a>, <a href="https://arxiv.org/format/2410.04196">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Improving Generalization with Flat Hilbert Bayesian Inference </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Truong%2C+T">Tuan Truong</a>, <a href="/search/?searchtype=author&query=Tran%2C+Q">Quyen Tran</a>, <a href="/search/?searchtype=author&query=Pham-Ngoc%2C+Q">Quan Pham-Ngoc</a>, <a href="/search/?searchtype=author&query=Ho%2C+N">Nhat Ho</a>, <a href="/search/?searchtype=author&query=Phung%2C+D">Dinh Phung</a>, <a href="/search/?searchtype=author&query=Le%2C+T">Trung Le</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.04196v1-abstract-short" style="display: inline;"> We introduce Flat Hilbert Bayesian Inference (FHBI), an algorithm designed to enhance generalization in Bayesian inference. Our approach involves an iterative two-step procedure with an adversarial functional perturbation step and a functional descent step within the reproducing kernel Hilbert spaces. This methodology is supported by a theoretical analysis that extends previous findings on general… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04196v1-abstract-full').style.display = 'inline'; document.getElementById('2410.04196v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.04196v1-abstract-full" style="display: none;"> We introduce Flat Hilbert Bayesian Inference (FHBI), an algorithm designed to enhance generalization in Bayesian inference. Our approach involves an iterative two-step procedure with an adversarial functional perturbation step and a functional descent step within the reproducing kernel Hilbert spaces. This methodology is supported by a theoretical analysis that extends previous findings on generalization ability from finite-dimensional Euclidean spaces to infinite-dimensional functional spaces. To evaluate the effectiveness of FHBI, we conduct comprehensive comparisons against seven baseline methods on the VTAB-1K benchmark, which encompasses 19 diverse datasets across various domains with diverse semantics. Empirical results demonstrate that FHBI consistently outperforms the baselines by notable margins, highlighting its practical efficacy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.04196v1-abstract-full').style.display = 'none'; document.getElementById('2410.04196v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.02954">arXiv:2410.02954</a> <span> [<a href="https://arxiv.org/pdf/2410.02954">pdf</a>, <a href="https://arxiv.org/ps/2410.02954">ps</a>, <a href="https://arxiv.org/format/2410.02954">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Digital Twin for O-RAN Towards 6G </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Nguyen%2C+H+X">Huan X. Nguyen</a>, <a href="/search/?searchtype=author&query=Sun%2C+K">Kexuan Sun</a>, <a href="/search/?searchtype=author&query=To%2C+D">Duc To</a>, <a href="/search/?searchtype=author&query=Vien%2C+Q">Quoc-Tuan Vien</a>, <a href="/search/?searchtype=author&query=Le%2C+T+A">Tuan Anh Le</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.02954v1-abstract-short" style="display: inline;"> In future wireless systems of beyond 5G and 6G, addressing diverse applications with varying quality requirements is essential. Open Radio Access Network (O-RAN) architectures offer the potential for dynamic resource adaptation based on traffic demands. However, achieving real-time resource orchestration remains a challenge. Simultaneously, Digital Twin (DT) technology holds promise for testing an… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02954v1-abstract-full').style.display = 'inline'; document.getElementById('2410.02954v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.02954v1-abstract-full" style="display: none;"> In future wireless systems of beyond 5G and 6G, addressing diverse applications with varying quality requirements is essential. Open Radio Access Network (O-RAN) architectures offer the potential for dynamic resource adaptation based on traffic demands. However, achieving real-time resource orchestration remains a challenge. Simultaneously, Digital Twin (DT) technology holds promise for testing and analysing complex systems, offering a unique platform for addressing dynamic operation and automation in O-RAN architectures. Yet, developing DTs for complex 5G/6G networks poses challenges, including data exchanges, ML model training data availability, network dynamics, processing power limitations, interdisciplinary collaboration needs, and a lack of standardized methodologies. This paper provides an overview of Open RAN architecture, trend and challenges, proposing the DT concepts for O-RAN with solution examples showcasing its integration into the framework. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02954v1-abstract-full').style.display = 'none'; document.getElementById('2410.02954v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">IEEE Communications Magazine 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.02827">arXiv:2410.02827</a> <span> [<a href="https://arxiv.org/pdf/2410.02827">pdf</a>, <a href="https://arxiv.org/format/2410.02827">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Effective Intrusion Detection for UAV Communications using Autoencoder-based Feature Extraction and Machine Learning Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Vuong%2C+T">Tuan-Cuong Vuong</a>, <a href="/search/?searchtype=author&query=Nguyen%2C+C+C">Cong Chi Nguyen</a>, <a href="/search/?searchtype=author&query=Pham%2C+V">Van-Cuong Pham</a>, <a href="/search/?searchtype=author&query=Le%2C+T">Thi-Thanh-Huyen Le</a>, <a href="/search/?searchtype=author&query=Tran%2C+X">Xuan-Nam Tran</a>, <a href="/search/?searchtype=author&query=Van+Luong%2C+T">Thien Van Luong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.02827v1-abstract-short" style="display: inline;"> This paper proposes a novel intrusion detection method for unmanned aerial vehicles (UAV) in the presence of recent actual UAV intrusion dataset. In particular, in the first stage of our method, we design an autoencoder architecture for effectively extracting important features, which are then fed into various machine learning models in the second stage for detecting and classifying attack types.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02827v1-abstract-full').style.display = 'inline'; document.getElementById('2410.02827v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.02827v1-abstract-full" style="display: none;"> This paper proposes a novel intrusion detection method for unmanned aerial vehicles (UAV) in the presence of recent actual UAV intrusion dataset. In particular, in the first stage of our method, we design an autoencoder architecture for effectively extracting important features, which are then fed into various machine learning models in the second stage for detecting and classifying attack types. To the best of our knowledge, this is the first attempt to propose such the autoencoder-based machine learning intrusion detection method for UAVs using actual dataset, while most of existing works only consider either simulated datasets or datasets irrelevant to UAV communications. Our experiment results show that the proposed method outperforms the baselines such as feature selection schemes in both binary and multi-class classification tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02827v1-abstract-full').style.display = 'none'; document.getElementById('2410.02827v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">4 pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> NOLTA 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.02200">arXiv:2410.02200</a> <span> [<a href="https://arxiv.org/pdf/2410.02200">pdf</a>, <a href="https://arxiv.org/format/2410.02200">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Revisiting Prefix-tuning: Statistical Benefits of Reparameterization among Prompts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Le%2C+M">Minh Le</a>, <a href="/search/?searchtype=author&query=Nguyen%2C+C">Chau Nguyen</a>, <a href="/search/?searchtype=author&query=Nguyen%2C+H">Huy Nguyen</a>, <a href="/search/?searchtype=author&query=Tran%2C+Q">Quyen Tran</a>, <a href="/search/?searchtype=author&query=Le%2C+T">Trung Le</a>, <a href="/search/?searchtype=author&query=Ho%2C+N">Nhat Ho</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.02200v1-abstract-short" style="display: inline;"> Prompt-based techniques, such as prompt-tuning and prefix-tuning, have gained prominence for their efficiency in fine-tuning large pre-trained models. Despite their widespread adoption, the theoretical foundations of these methods remain limited. For instance, in prefix-tuning, we observe that a key factor in achieving performance parity with full fine-tuning lies in the reparameterization strateg… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02200v1-abstract-full').style.display = 'inline'; document.getElementById('2410.02200v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.02200v1-abstract-full" style="display: none;"> Prompt-based techniques, such as prompt-tuning and prefix-tuning, have gained prominence for their efficiency in fine-tuning large pre-trained models. Despite their widespread adoption, the theoretical foundations of these methods remain limited. For instance, in prefix-tuning, we observe that a key factor in achieving performance parity with full fine-tuning lies in the reparameterization strategy. However, the theoretical principles underpinning the effectiveness of this approach have yet to be thoroughly examined. Our study demonstrates that reparameterization is not merely an engineering trick but is grounded in deep theoretical foundations. Specifically, we show that the reparameterization strategy implicitly encodes a shared structure between prefix key and value vectors. Building on recent insights into the connection between prefix-tuning and mixture of experts models, we further illustrate that this shared structure significantly improves sample efficiency in parameter estimation compared to non-shared alternatives. The effectiveness of prefix-tuning across diverse tasks is empirically confirmed to be enhanced by the shared structure, through extensive experiments in both visual and language domains. Additionally, we uncover similar structural benefits in prompt-tuning, offering new perspectives on its success. Our findings provide theoretical and empirical contributions, advancing the understanding of prompt-based methods and their underlying mechanisms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.02200v1-abstract-full').style.display = 'none'; document.getElementById('2410.02200v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Minh Le, Chau Nguyen, Huy Nguyen contributed equally to this work. 50 pages, 8 tables, 2 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.00530">arXiv:2410.00530</a> <span> [<a href="https://arxiv.org/pdf/2410.00530">pdf</a>, <a href="https://arxiv.org/ps/2410.00530">ps</a>, <a href="https://arxiv.org/format/2410.00530">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Analysis of PDEs">math.AP</span> </div> </div> <p class="title is-5 mathjax"> On (discounted) global Eikonal equations in metric spaces </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=L%C3%AA%2C+T+M">Tr铆 Minh L锚</a>, <a href="/search/?searchtype=author&query=Tapia-Garc%C3%ADa%2C+S">Sebasti谩n Tapia-Garc铆a</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.00530v1-abstract-short" style="display: inline;"> Eikonal equations in metric spaces have strong connections with the local slope operator (or the De Giorgi slope). In this manuscript, we explore and delve into an analogous model based on the global slope operator, expressed as $位u + G[u] = \ell$, where $位\geq 0$. In strong contrast with the classical theory, the global slope operator relies neither on the local properties of the functions nor on… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00530v1-abstract-full').style.display = 'inline'; document.getElementById('2410.00530v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.00530v1-abstract-full" style="display: none;"> Eikonal equations in metric spaces have strong connections with the local slope operator (or the De Giorgi slope). In this manuscript, we explore and delve into an analogous model based on the global slope operator, expressed as $位u + G[u] = \ell$, where $位\geq 0$. In strong contrast with the classical theory, the global slope operator relies neither on the local properties of the functions nor on the structure of the space, and therefore new insights are developed in order to analyze the above equation. Under mild assumptions on the metric space $X$ and the given data $\ell$, we primarily discuss: $(a)$ the existence and uniqueness of (pointwise) solutions; $(b)$ a viscosity perspective and the employment of Perron's method to consider the maximal solution; $(c)$ stability of the maximal solution with respect to both, the data $\ell$ and the discount factor $位$. Our techniques provide a method to approximate solutions of Eikonal equations in metric spaces and a new integration formula based on the global slope of the given function. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00530v1-abstract-full').style.display = 'none'; document.getElementById('2410.00530v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">41 pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 35F21; 30L99 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2410.00334">arXiv:2410.00334</a> <span> [<a href="https://arxiv.org/pdf/2410.00334">pdf</a>, <a href="https://arxiv.org/format/2410.00334">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Preserving Generalization of Language models in Few-shot Continual Relation Extraction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Tran%2C+Q">Quyen Tran</a>, <a href="/search/?searchtype=author&query=Thanh%2C+N+X">Nguyen Xuan Thanh</a>, <a href="/search/?searchtype=author&query=Anh%2C+N+H">Nguyen Hoang Anh</a>, <a href="/search/?searchtype=author&query=Hai%2C+N+L">Nam Le Hai</a>, <a href="/search/?searchtype=author&query=Le%2C+T">Trung Le</a>, <a href="/search/?searchtype=author&query=Van+Ngo%2C+L">Linh Van Ngo</a>, <a href="/search/?searchtype=author&query=Nguyen%2C+T+H">Thien Huu Nguyen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2410.00334v1-abstract-short" style="display: inline;"> Few-shot Continual Relations Extraction (FCRE) is an emerging and dynamic area of study where models can sequentially integrate knowledge from new relations with limited labeled data while circumventing catastrophic forgetting and preserving prior knowledge from pre-trained backbones. In this work, we introduce a novel method that leverages often-discarded language model heads. By employing these… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00334v1-abstract-full').style.display = 'inline'; document.getElementById('2410.00334v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2410.00334v1-abstract-full" style="display: none;"> Few-shot Continual Relations Extraction (FCRE) is an emerging and dynamic area of study where models can sequentially integrate knowledge from new relations with limited labeled data while circumventing catastrophic forgetting and preserving prior knowledge from pre-trained backbones. In this work, we introduce a novel method that leverages often-discarded language model heads. By employing these components via a mutual information maximization strategy, our approach helps maintain prior knowledge from the pre-trained backbone and strategically aligns the primary classification head, thereby enhancing model performance. Furthermore, we explore the potential of Large Language Models (LLMs), renowned for their wealth of knowledge, in addressing FCRE challenges. Our comprehensive experimental results underscore the efficacy of the proposed method and offer valuable insights for future work. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2410.00334v1-abstract-full').style.display = 'none'; document.getElementById('2410.00334v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to EMNLP 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.19871">arXiv:2409.19871</a> <span> [<a href="https://arxiv.org/pdf/2409.19871">pdf</a>, <a href="https://arxiv.org/format/2409.19871">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> TSI: A Multi-View Representation Learning Approach for Time Series Forecasting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Gao%2C+W">Wentao Gao</a>, <a href="/search/?searchtype=author&query=Xu%2C+Z">Ziqi Xu</a>, <a href="/search/?searchtype=author&query=Li%2C+J">Jiuyong Li</a>, <a href="/search/?searchtype=author&query=Liu%2C+L">Lin Liu</a>, <a href="/search/?searchtype=author&query=Liu%2C+J">Jixue Liu</a>, <a href="/search/?searchtype=author&query=Le%2C+T+D">Thuc Duy Le</a>, <a href="/search/?searchtype=author&query=Cheng%2C+D">Debo Cheng</a>, <a href="/search/?searchtype=author&query=Zhao%2C+Y">Yanchang Zhao</a>, <a href="/search/?searchtype=author&query=Chen%2C+Y">Yun Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.19871v1-abstract-short" style="display: inline;"> As the growing demand for long sequence time-series forecasting in real-world applications, such as electricity consumption planning, the significance of time series forecasting becomes increasingly crucial across various domains. This is highlighted by recent advancements in representation learning within the field. This study introduces a novel multi-view approach for time series forecasting tha… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19871v1-abstract-full').style.display = 'inline'; document.getElementById('2409.19871v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.19871v1-abstract-full" style="display: none;"> As the growing demand for long sequence time-series forecasting in real-world applications, such as electricity consumption planning, the significance of time series forecasting becomes increasingly crucial across various domains. This is highlighted by recent advancements in representation learning within the field. This study introduces a novel multi-view approach for time series forecasting that innovatively integrates trend and seasonal representations with an Independent Component Analysis (ICA)-based representation. Recognizing the limitations of existing methods in representing complex and high-dimensional time series data, this research addresses the challenge by combining TS (trend and seasonality) and ICA (independent components) perspectives. This approach offers a holistic understanding of time series data, going beyond traditional models that often miss nuanced, nonlinear relationships. The efficacy of TSI model is demonstrated through comprehensive testing on various benchmark datasets, where it shows superior performance over current state-of-the-art models, particularly in multivariate forecasting. This method not only enhances the accuracy of forecasting but also contributes significantly to the field by providing a more in-depth understanding of time series data. The research which uses ICA for a view lays the groundwork for further exploration and methodological advancements in time series forecasting, opening new avenues for research and practical applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.19871v1-abstract-full').style.display = 'none'; document.getElementById('2409.19871v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">AJCAI Oral Accepted</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.18288">arXiv:2409.18288</a> <span> [<a href="https://arxiv.org/pdf/2409.18288">pdf</a>, <a href="https://arxiv.org/format/2409.18288">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Instrumentation and Detectors">physics.ins-det</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="High Energy Physics - Experiment">hep-ex</span> </div> </div> <p class="title is-5 mathjax"> The hypothetical track-length fitting algorithm for energy measurement in liquid argon TPCs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=DUNE+Collaboration"> DUNE Collaboration</a>, <a href="/search/?searchtype=author&query=Abud%2C+A+A">A. Abed Abud</a>, <a href="/search/?searchtype=author&query=Abi%2C+B">B. Abi</a>, <a href="/search/?searchtype=author&query=Acciarri%2C+R">R. Acciarri</a>, <a href="/search/?searchtype=author&query=Acero%2C+M+A">M. A. Acero</a>, <a href="/search/?searchtype=author&query=Adames%2C+M+R">M. R. Adames</a>, <a href="/search/?searchtype=author&query=Adamov%2C+G">G. Adamov</a>, <a href="/search/?searchtype=author&query=Adamowski%2C+M">M. Adamowski</a>, <a href="/search/?searchtype=author&query=Adams%2C+D">D. Adams</a>, <a href="/search/?searchtype=author&query=Adinolfi%2C+M">M. Adinolfi</a>, <a href="/search/?searchtype=author&query=Adriano%2C+C">C. Adriano</a>, <a href="/search/?searchtype=author&query=Aduszkiewicz%2C+A">A. Aduszkiewicz</a>, <a href="/search/?searchtype=author&query=Aguilar%2C+J">J. Aguilar</a>, <a href="/search/?searchtype=author&query=Akbar%2C+F">F. Akbar</a>, <a href="/search/?searchtype=author&query=Alex%2C+N+S">N. S. Alex</a>, <a href="/search/?searchtype=author&query=Allison%2C+K">K. Allison</a>, <a href="/search/?searchtype=author&query=Monsalve%2C+S+A">S. Alonso Monsalve</a>, <a href="/search/?searchtype=author&query=Alrashed%2C+M">M. Alrashed</a>, <a href="/search/?searchtype=author&query=Alton%2C+A">A. Alton</a>, <a href="/search/?searchtype=author&query=Alvarez%2C+R">R. Alvarez</a>, <a href="/search/?searchtype=author&query=Alves%2C+T">T. Alves</a>, <a href="/search/?searchtype=author&query=Amar%2C+H">H. Amar</a>, <a href="/search/?searchtype=author&query=Amedo%2C+P">P. Amedo</a>, <a href="/search/?searchtype=author&query=Anderson%2C+J">J. Anderson</a>, <a href="/search/?searchtype=author&query=Andreopoulos%2C+C">C. Andreopoulos</a> , et al. (1348 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.18288v2-abstract-short" style="display: inline;"> This paper introduces the hypothetical track-length fitting algorithm, a novel method for measuring the kinetic energies of ionizing particles in liquid argon time projection chambers (LArTPCs). The algorithm finds the most probable offset in track length for a track-like object by comparing the measured ionization density as a function of position with a theoretical prediction of the energy loss… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.18288v2-abstract-full').style.display = 'inline'; document.getElementById('2409.18288v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.18288v2-abstract-full" style="display: none;"> This paper introduces the hypothetical track-length fitting algorithm, a novel method for measuring the kinetic energies of ionizing particles in liquid argon time projection chambers (LArTPCs). The algorithm finds the most probable offset in track length for a track-like object by comparing the measured ionization density as a function of position with a theoretical prediction of the energy loss as a function of the energy, including models of electron recombination and detector response. The algorithm can be used to measure the energies of particles that interact before they stop, such as charged pions that are absorbed by argon nuclei. The algorithm's energy measurement resolutions and fractional biases are presented as functions of particle kinetic energy and number of track hits using samples of stopping secondary charged pions in data collected by the ProtoDUNE-SP detector, and also in a detailed simulation. Additional studies describe impact of the dE/dx model on energy measurement performance. The method described in this paper to characterize the energy measurement performance can be repeated in any LArTPC experiment using stopping secondary charged pions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.18288v2-abstract-full').style.display = 'none'; document.getElementById('2409.18288v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Report number:</span> FERMILAB-PUB-24-0561-LBNF-PPD, CERN-EP-2024-256 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.15246">arXiv:2409.15246</a> <span> [<a href="https://arxiv.org/pdf/2409.15246">pdf</a>, <a href="https://arxiv.org/format/2409.15246">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> On-Air Deep Learning Integrated Semantic Inference Models for Enhanced Earth Observation Satellite Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Chou%2C+H">Hong-fu Chou</a>, <a href="/search/?searchtype=author&query=Ha%2C+V+N">Vu Nguyen Ha</a>, <a href="/search/?searchtype=author&query=Thiruvasagam%2C+P">Prabhu Thiruvasagam</a>, <a href="/search/?searchtype=author&query=Le%2C+T">Thanh-Dung Le</a>, <a href="/search/?searchtype=author&query=Eappen%2C+G">Geoffrey Eappen</a>, <a href="/search/?searchtype=author&query=Nguyen%2C+T+T">Ti Ti Nguyen</a>, <a href="/search/?searchtype=author&query=Garces-Socarras%2C+L+M">Luis M. Garces-Socarras</a>, <a href="/search/?searchtype=author&query=Gonzalez-Rios%2C+J+L">Jorge L. Gonzalez-Rios</a>, <a href="/search/?searchtype=author&query=Merlano-Duncan%2C+J+C">Juan Carlos Merlano-Duncan</a>, <a href="/search/?searchtype=author&query=Chatzinotas%2C+S">Symeon Chatzinotas</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.15246v3-abstract-short" style="display: inline;"> Earth Observation (EO) systems are crucial for cartography, disaster surveillance, and resource administration. Nonetheless, they encounter considerable obstacles in the processing and transmission of extensive data, especially in specialized domains such as precision agriculture and real-time disaster response. Earth observation satellites, outfitted with remote sensing technology, gather data fr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15246v3-abstract-full').style.display = 'inline'; document.getElementById('2409.15246v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.15246v3-abstract-full" style="display: none;"> Earth Observation (EO) systems are crucial for cartography, disaster surveillance, and resource administration. Nonetheless, they encounter considerable obstacles in the processing and transmission of extensive data, especially in specialized domains such as precision agriculture and real-time disaster response. Earth observation satellites, outfitted with remote sensing technology, gather data from onboard sensors and IoT-enabled terrestrial objects, delivering important information remotely. Domain-adapted Large Language Models (LLMs) provide a solution by enabling the integration of raw and processed EO data. Through domain adaptation, LLMs improve the assimilation and analysis of many data sources, tackling the intricacies of specialized datasets in agriculture and disaster response. This data synthesis, directed by LLMs, enhances the precision and pertinence of conveyed information. This study provides a thorough examination of using semantic inference and deep learning for sophisticated EO systems. It presents an innovative architecture for semantic communication in EO satellite networks, designed to improve data transmission efficiency using semantic processing methodologies. Recent advancements in onboard processing technologies enable dependable, adaptable, and energy-efficient data management in orbit. These improvements guarantee reliable performance in adverse space circumstances using radiation-hardened and reconfigurable technology. Collectively, these advancements enable next-generation satellite missions with improved processing capabilities, crucial for operational flexibility and real-time decision-making in 6G satellite communication. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.15246v3-abstract-full').style.display = 'none'; document.getElementById('2409.15246v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 7 figures, Journal</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.13057">arXiv:2409.13057</a> <span> [<a href="https://arxiv.org/pdf/2409.13057">pdf</a>, <a href="https://arxiv.org/format/2409.13057">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Natural Language Processing Methods for the Study of Protein-Ligand Interactions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Michels%2C+J">James Michels</a>, <a href="/search/?searchtype=author&query=Bandarupalli%2C+R">Ramya Bandarupalli</a>, <a href="/search/?searchtype=author&query=Akbari%2C+A+A">Amin Ahangar Akbari</a>, <a href="/search/?searchtype=author&query=Le%2C+T">Thai Le</a>, <a href="/search/?searchtype=author&query=Xiao%2C+H">Hong Xiao</a>, <a href="/search/?searchtype=author&query=Li%2C+J">Jing Li</a>, <a href="/search/?searchtype=author&query=Hom%2C+E+F+Y">Erik F. Y. Hom</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.13057v2-abstract-short" style="display: inline;"> Recent advances in Natural Language Processing (NLP) have ignited interest in developing effective methods for predicting protein-ligand interactions (PLIs) given their relevance to drug discovery and protein engineering efforts and the ever-growing volume of biochemical sequence and structural data available. The parallels between human languages and the "languages" used to represent proteins and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13057v2-abstract-full').style.display = 'inline'; document.getElementById('2409.13057v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.13057v2-abstract-full" style="display: none;"> Recent advances in Natural Language Processing (NLP) have ignited interest in developing effective methods for predicting protein-ligand interactions (PLIs) given their relevance to drug discovery and protein engineering efforts and the ever-growing volume of biochemical sequence and structural data available. The parallels between human languages and the "languages" used to represent proteins and ligands have enabled the use of NLP machine learning approaches to advance PLI studies. In this review, we explain where and how such approaches have been applied in the recent literature and discuss useful mechanisms such as long short-term memory, transformers, and attention. We conclude with a discussion of the current limitations of NLP methods for the study of PLIs as well as key challenges that need to be addressed in future work. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.13057v2-abstract-full').style.display = 'none'; document.getElementById('2409.13057v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">52 Pages and 3 Figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.11993">arXiv:2409.11993</a> <span> [<a href="https://arxiv.org/pdf/2409.11993">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Chemical Physics">physics.chem-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Physics">physics.comp-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Medical Physics">physics.med-ph</span> </div> </div> <p class="title is-5 mathjax"> Modeling water radiolysis with Geant4-DNA: Impact of the temporal structure of the irradiation pulse under oxygen conditions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Le%2C+T+A">Tuan Anh Le</a>, <a href="/search/?searchtype=author&query=Tran%2C+H+N">Hoang Ngoc Tran</a>, <a href="/search/?searchtype=author&query=Fattori%2C+S">Serena Fattori</a>, <a href="/search/?searchtype=author&query=Phan%2C+V+C">Viet Cuong Phan</a>, <a href="/search/?searchtype=author&query=Incerti%2C+S">Sebastien Incerti</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.11993v1-abstract-short" style="display: inline;"> The differences in H2O2 production between conventional (CONV) and ultra-high dose rate (UHDR) irradiations in water radiolysis are still not fully understood. The lower levels of this radiolytic species, as a critical end product of water radiolysis, are particularly relevant for investigating the connection between the high-density energy deposition during short-duration physical events (ionizat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11993v1-abstract-full').style.display = 'inline'; document.getElementById('2409.11993v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.11993v1-abstract-full" style="display: none;"> The differences in H2O2 production between conventional (CONV) and ultra-high dose rate (UHDR) irradiations in water radiolysis are still not fully understood. The lower levels of this radiolytic species, as a critical end product of water radiolysis, are particularly relevant for investigating the connection between the high-density energy deposition during short-duration physical events (ionizations or excitations) and biological responses of the FLASH effect. In this study, we developed a new Geant4-DNA chemistry model to simulate radiolysis considering the time structure of the irradiation pulse at different absorbed doses to liquid water of 0.01, 0.1, 1, and 2 Gy under 1 MeV electron irradiation. The model allows the description of the beam's temporal structure, including the pulse duration, the pulse repetition frequency, and the pulse amplitude for the different beam irradiation conditions through a wide dose rate range, from 0.01 Gy/s up to about 105 Gy/s, at various oxygen concentrations. The preliminary results indicate a correlation between the temporal structure of the pulses and a significant reduction in the production of reactive oxygen species (ROS) at different dose rates. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11993v1-abstract-full').style.display = 'none'; document.getElementById('2409.11993v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27 pages, 14 figures including 3 figures in appendix</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.11124">arXiv:2409.11124</a> <span> [<a href="https://arxiv.org/pdf/2409.11124">pdf</a>, <a href="https://arxiv.org/ps/2409.11124">ps</a>, <a href="https://arxiv.org/format/2409.11124">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Analysis of PDEs">math.AP</span> </div> </div> <p class="title is-5 mathjax"> Comparison principle for general nonlocal Hamilton-Jacobi equations with superlinear gradient </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Ciomaga%2C+A">Adina Ciomaga</a>, <a href="/search/?searchtype=author&query=Le%2C+T+M">Tri Minh Le</a>, <a href="/search/?searchtype=author&query=Ley%2C+O">Olivier Ley</a>, <a href="/search/?searchtype=author&query=Topp%2C+E">Erwin Topp</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.11124v1-abstract-short" style="display: inline;"> We obtain the comparison principle for discontinuous viscosity sub- and supersolutions of nonlocal Hamilton-Jacobi equations, with superlinear and coercive gradient terms. The nonlocal terms are integro-differential operators in L茅vy form, with general measures: $x$-dependent, possibly degenerate and without any restriction on the order. The measures must satisfy a combined Wasserstein/Total Varia… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11124v1-abstract-full').style.display = 'inline'; document.getElementById('2409.11124v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.11124v1-abstract-full" style="display: none;"> We obtain the comparison principle for discontinuous viscosity sub- and supersolutions of nonlocal Hamilton-Jacobi equations, with superlinear and coercive gradient terms. The nonlocal terms are integro-differential operators in L茅vy form, with general measures: $x$-dependent, possibly degenerate and without any restriction on the order. The measures must satisfy a combined Wasserstein/Total Variation-continuity assumption, which is one of the weakest conditions used in the context of viscosity approach for this type of integro-differential PDEs. The proof relies on a regularizing effect due to the gradient growth. We present several examples of applications to PDEs with different types of nonlocal operators (measures with density, operators of variable order, L茅vy-It么 operators). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.11124v1-abstract-full').style.display = 'none'; document.getElementById('2409.11124v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.08337">arXiv:2409.08337</a> <span> [<a href="https://arxiv.org/pdf/2409.08337">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> X-ray Fluoroscopy Guided Localization and Steering of Medical Microrobots through Virtual Enhancement </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Alabay%2C+H+H">Husnu Halid Alabay</a>, <a href="/search/?searchtype=author&query=Le%2C+T">Tuan-Anh Le</a>, <a href="/search/?searchtype=author&query=Ceylan%2C+H">Hakan Ceylan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.08337v1-abstract-short" style="display: inline;"> In developing medical interventions using untethered milli- and microrobots, ensuring safety and effectiveness relies on robust methods for detection, real-time tracking, and precise localization within the body. However, the inherent non-transparency of the human body poses a significant obstacle, limiting robot detection primarily to specialized imaging systems such as X-ray fluoroscopy, which o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.08337v1-abstract-full').style.display = 'inline'; document.getElementById('2409.08337v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.08337v1-abstract-full" style="display: none;"> In developing medical interventions using untethered milli- and microrobots, ensuring safety and effectiveness relies on robust methods for detection, real-time tracking, and precise localization within the body. However, the inherent non-transparency of the human body poses a significant obstacle, limiting robot detection primarily to specialized imaging systems such as X-ray fluoroscopy, which often lack crucial anatomical details. Consequently, the robot operator (human or machine) would encounter severe challenges in accurately determining the location of the robot and steering its motion. This study explores the feasibility of circumventing this challenge by creating a simulation environment that contains the precise digital replica (virtual twin) of a model microrobot operational workspace. Synchronizing coordinate systems between the virtual and real worlds and continuously integrating microrobot position data from the image stream into the virtual twin allows the microrobot operator to control navigation in the virtual world. We validate this concept by demonstrating the tracking and steering of a mobile magnetic robot in confined phantoms with high temporal resolution (< 100 ms, with an average of ~20 ms) visual feedback. Additionally, our object detection-based localization approach offers the potential to reduce overall patient exposure to X-ray doses during continuous microrobot tracking without compromising tracking accuracy. Ultimately, we address a critical gap in developing image-guided remote interventions with untethered medical microrobots, particularly for near-future applications in animal models and human patients. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.08337v1-abstract-full').style.display = 'none'; document.getElementById('2409.08337v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.06233">arXiv:2409.06233</a> <span> [<a href="https://arxiv.org/pdf/2409.06233">pdf</a>, <a href="https://arxiv.org/format/2409.06233">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> VBIT: Towards Enhancing Privacy Control Over IoT Devices </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Aaraj%2C+J+A">Jad Al Aaraj</a>, <a href="/search/?searchtype=author&query=Figueira%2C+O">Olivia Figueira</a>, <a href="/search/?searchtype=author&query=Le%2C+T">Tu Le</a>, <a href="/search/?searchtype=author&query=Figueira%2C+I">Isabela Figueira</a>, <a href="/search/?searchtype=author&query=Trimananda%2C+R">Rahmadi Trimananda</a>, <a href="/search/?searchtype=author&query=Markopoulou%2C+A">Athina Markopoulou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.06233v1-abstract-short" style="display: inline;"> Internet-of-Things (IoT) devices are increasingly deployed at home, at work, and in other shared and public spaces. IoT devices collect and share data with service providers and third parties, which poses privacy concerns. Although privacy enhancing tools are quite advanced in other applications domains (\eg~ advertising and tracker blockers for browsers), users have currently no convenient way to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.06233v1-abstract-full').style.display = 'inline'; document.getElementById('2409.06233v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.06233v1-abstract-full" style="display: none;"> Internet-of-Things (IoT) devices are increasingly deployed at home, at work, and in other shared and public spaces. IoT devices collect and share data with service providers and third parties, which poses privacy concerns. Although privacy enhancing tools are quite advanced in other applications domains (\eg~ advertising and tracker blockers for browsers), users have currently no convenient way to know or manage what and how data is collected and shared by IoT devices. In this paper, we present VBIT, an interactive system combining Mixed Reality (MR) and web-based applications that allows users to: (1) uncover and visualize tracking services by IoT devices in an instrumented space and (2) take action to stop or limit that tracking. We design and implement VBIT to operate at the network traffic level, and we show that it has negligible performance overhead, and offers flexibility and good usability. We perform a mixed-method user study consisting of an online survey and an in-person interview study. We show that VBIT users appreciate VBIT's transparency, control, and customization features, and they become significantly more willing to install an IoT advertising and tracking blocker, after using VBIT. In the process, we obtain design insights that can be used to further iterate and improve the design of VBIT and other systems for IoT transparency and control. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.06233v1-abstract-full').style.display = 'none'; document.getElementById('2409.06233v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.05924">arXiv:2409.05924</a> <span> [<a href="https://arxiv.org/pdf/2409.05924">pdf</a>, <a href="https://arxiv.org/format/2409.05924">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Continuous Learning of Transformer-based Audio Deepfake Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Le%2C+T+D+N">Tuan Duy Nguyen Le</a>, <a href="/search/?searchtype=author&query=Teh%2C+K+K">Kah Kuan Teh</a>, <a href="/search/?searchtype=author&query=Tran%2C+H+D">Huy Dat Tran</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.05924v1-abstract-short" style="display: inline;"> This paper proposes a novel framework for audio deepfake detection with two main objectives: i) attaining the highest possible accuracy on available fake data, and ii) effectively performing continuous learning on new fake data in a few-shot learning manner. Specifically, we conduct a large audio deepfake collection using various deep audio generation methods. The data is further enhanced with add… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05924v1-abstract-full').style.display = 'inline'; document.getElementById('2409.05924v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.05924v1-abstract-full" style="display: none;"> This paper proposes a novel framework for audio deepfake detection with two main objectives: i) attaining the highest possible accuracy on available fake data, and ii) effectively performing continuous learning on new fake data in a few-shot learning manner. Specifically, we conduct a large audio deepfake collection using various deep audio generation methods. The data is further enhanced with additional augmentation methods to increase variations amidst compressions, far-field recordings, noise, and other distortions. We then adopt the Audio Spectrogram Transformer for the audio deepfake detection model. Accordingly, the proposed method achieves promising performance on various benchmark datasets. Furthermore, we present a continuous learning plugin module to update the trained model most effectively with the fewest possible labeled data points of the new fake type. The proposed method outperforms the conventional direct fine-tuning approach with much fewer labeled data points. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05924v1-abstract-full').style.display = 'none'; document.getElementById('2409.05924v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to INTERSPEECH 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.05280">arXiv:2409.05280</a> <span> [<a href="https://arxiv.org/pdf/2409.05280">pdf</a>, <a href="https://arxiv.org/format/2409.05280">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> RotCAtt-TransUNet++: Novel Deep Neural Network for Sophisticated Cardiac Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Nguyen-Le%2C+Q">Quoc-Bao Nguyen-Le</a>, <a href="/search/?searchtype=author&query=Le%2C+T">Tuan-Hy Le</a>, <a href="/search/?searchtype=author&query=Do%2C+A">Anh-Triet Do</a>, <a href="/search/?searchtype=author&query=Trinh%2C+Q">Quoc-Huy Trinh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.05280v2-abstract-short" style="display: inline;"> Cardiovascular disease remains a predominant global health concern, responsible for a significant portion of mortality worldwide. Accurate segmentation of cardiac medical imaging data is pivotal in mitigating fatality rates associated with cardiovascular conditions. However, existing state-of-the-art (SOTA) neural networks, including both CNN-based and Transformer-based approaches, exhibit limitat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05280v2-abstract-full').style.display = 'inline'; document.getElementById('2409.05280v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.05280v2-abstract-full" style="display: none;"> Cardiovascular disease remains a predominant global health concern, responsible for a significant portion of mortality worldwide. Accurate segmentation of cardiac medical imaging data is pivotal in mitigating fatality rates associated with cardiovascular conditions. However, existing state-of-the-art (SOTA) neural networks, including both CNN-based and Transformer-based approaches, exhibit limitations in practical applicability due to their inability to effectively capture inter-slice connections alongside intra-slice information. This deficiency is particularly evident in datasets featuring intricate, long-range details along the z-axis, such as coronary arteries in axial views. Additionally, SOTA methods fail to differentiate non-cardiac components from myocardium in segmentation, leading to the "spraying" phenomenon. To address these challenges, we present RotCAtt-TransUNet++, a novel architecture tailored for robust segmentation of complex cardiac structures. Our approach emphasizes modeling global contexts by aggregating multiscale features with nested skip connections in the encoder. It integrates transformer layers to capture interactions between patches and employs a rotatory attention mechanism to capture connectivity between multiple slices (inter-slice information). Additionally, a channel-wise cross-attention gate guides the fused multi-scale channel-wise information and features from decoder stages to bridge semantic gaps. Experimental results demonstrate that our proposed model outperforms existing SOTA approaches across four cardiac datasets and one abdominal dataset. Importantly, coronary arteries and myocardium are annotated with near-perfect accuracy during inference. An ablation study shows that the rotatory attention mechanism effectively transforms embedded vectorized patches in the semantic dimensional space, enhancing segmentation accuracy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.05280v2-abstract-full').style.display = 'none'; document.getElementById('2409.05280v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 11 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> MAPR2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.04228">arXiv:2409.04228</a> <span> [<a href="https://arxiv.org/pdf/2409.04228">pdf</a>, <a href="https://arxiv.org/ps/2409.04228">ps</a>, <a href="https://arxiv.org/format/2409.04228">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Firefly Algorithm for Movable Antenna Arrays </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Hoang%2C+M+K">Manh Kha Hoang</a>, <a href="/search/?searchtype=author&query=Le%2C+T+A">Tuan Anh Le</a>, <a href="/search/?searchtype=author&query=Thuc%2C+K">Kieu-Xuan Thuc</a>, <a href="/search/?searchtype=author&query=Van+Luyen%2C+T">Tong Van Luyen</a>, <a href="/search/?searchtype=author&query=Yang%2C+X">Xin-She Yang</a>, <a href="/search/?searchtype=author&query=Ng%2C+D+W+K">Derrick Wing Kwan Ng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.04228v1-abstract-short" style="display: inline;"> This letter addresses a multivariate optimization problem for linear movable antenna arrays (MAAs). Particularly, the position and beamforming vectors of the under-investigated MAA are optimized simultaneously to maximize the minimum beamforming gain across several intended directions, while ensuring interference levels at various unintended directions remain below specified thresholds. To this en… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.04228v1-abstract-full').style.display = 'inline'; document.getElementById('2409.04228v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.04228v1-abstract-full" style="display: none;"> This letter addresses a multivariate optimization problem for linear movable antenna arrays (MAAs). Particularly, the position and beamforming vectors of the under-investigated MAA are optimized simultaneously to maximize the minimum beamforming gain across several intended directions, while ensuring interference levels at various unintended directions remain below specified thresholds. To this end, a swarm-intelligence-based firefly algorithm (FA) is introduced to acquire an effective solution to the optimization problem. Simulation results reveal the superior performance of the proposed FA approach compared to the state-of-the-art approach employing alternating optimization and successive convex approximation. This is attributed to the FA's effectiveness in handling non-convex multivariate and multimodal optimization problems without resorting approximations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.04228v1-abstract-full').style.display = 'none'; document.getElementById('2409.04228v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.03901">arXiv:2409.03901</a> <span> [<a href="https://arxiv.org/pdf/2409.03901">pdf</a>, <a href="https://arxiv.org/format/2409.03901">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Onboard Satellite Image Classification for Earth Observation: A Comparative Study of ViT Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Le%2C+T">Thanh-Dung Le</a>, <a href="/search/?searchtype=author&query=Ha%2C+V+N">Vu Nguyen Ha</a>, <a href="/search/?searchtype=author&query=Nguyen%2C+T+T">Ti Ti Nguyen</a>, <a href="/search/?searchtype=author&query=Eappen%2C+G">Geoffrey Eappen</a>, <a href="/search/?searchtype=author&query=Thiruvasagam%2C+P">Prabhu Thiruvasagam</a>, <a href="/search/?searchtype=author&query=Garces-Socarras%2C+L+M">Luis M. Garces-Socarras</a>, <a href="/search/?searchtype=author&query=Chou%2C+H">Hong-fu Chou</a>, <a href="/search/?searchtype=author&query=Gonzalez-Rios%2C+J+L">Jorge L. Gonzalez-Rios</a>, <a href="/search/?searchtype=author&query=Merlano-Duncan%2C+J+C">Juan Carlos Merlano-Duncan</a>, <a href="/search/?searchtype=author&query=Chatzinotas%2C+S">Symeon Chatzinotas</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.03901v2-abstract-short" style="display: inline;"> This study focuses on identifying the most effective pre-trained model for land use classification in onboard satellite processing, emphasizing achieving high accuracy, computational efficiency, and robustness against noisy data conditions commonly encountered during satellite-based inference. Through extensive experimentation, we compare the performance of traditional CNN-based, ResNet-based, and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.03901v2-abstract-full').style.display = 'inline'; document.getElementById('2409.03901v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.03901v2-abstract-full" style="display: none;"> This study focuses on identifying the most effective pre-trained model for land use classification in onboard satellite processing, emphasizing achieving high accuracy, computational efficiency, and robustness against noisy data conditions commonly encountered during satellite-based inference. Through extensive experimentation, we compare the performance of traditional CNN-based, ResNet-based, and various pre-trained vision Transformer models. Our findings demonstrate that pre-trained Vision Transformer (ViT) models, particularly MobileViTV2 and EfficientViT-M2, outperform models trained from scratch in terms of accuracy and efficiency. These models achieve high performance with reduced computational requirements and exhibit greater resilience during inference under noisy conditions. While MobileViTV2 has excelled on clean validation data, EfficientViT-M2 has proved more robust when handling noise, making it the most suitable model for onboard satellite EO tasks. Our experimental results demonstrate that EfficientViT-M2 is the optimal choice for reliable and efficient RS-IC in satellite operations, achieving 98.76 % of accuracy, precision, and recall. Precisely, EfficientViT-M2 delivers the highest performance across all metrics, excels in training efficiency (1,000s) and inference time (10s), and demonstrates greater robustness (overall robustness score of 0.79). Consequently, EfficientViT-M2 consumes 63.93 % less power than MobileViTV2 (79.23 W) and 73.26 % less power than SwinTransformer (108.90 W). This highlights its significant advantage in energy efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.03901v2-abstract-full').style.display = 'none'; document.getElementById('2409.03901v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 5 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.02385">arXiv:2409.02385</a> <span> [<a href="https://arxiv.org/pdf/2409.02385">pdf</a>, <a href="https://arxiv.org/format/2409.02385">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Unified Framework with Consistency across Modalities for Human Activity Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Tran%2C+T">Tuyen Tran</a>, <a href="/search/?searchtype=author&query=Le%2C+T+M">Thao Minh Le</a>, <a href="/search/?searchtype=author&query=Tran%2C+H">Hung Tran</a>, <a href="/search/?searchtype=author&query=Tran%2C+T">Truyen Tran</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.02385v1-abstract-short" style="display: inline;"> Recognizing human activities in videos is challenging due to the spatio-temporal complexity and context-dependence of human interactions. Prior studies often rely on single input modalities, such as RGB or skeletal data, limiting their ability to exploit the complementary advantages across modalities. Recent studies focus on combining these two modalities using simple feature fusion techniques. Ho… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.02385v1-abstract-full').style.display = 'inline'; document.getElementById('2409.02385v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.02385v1-abstract-full" style="display: none;"> Recognizing human activities in videos is challenging due to the spatio-temporal complexity and context-dependence of human interactions. Prior studies often rely on single input modalities, such as RGB or skeletal data, limiting their ability to exploit the complementary advantages across modalities. Recent studies focus on combining these two modalities using simple feature fusion techniques. However, due to the inherent disparities in representation between these input modalities, designing a unified neural network architecture to effectively leverage their complementary information remains a significant challenge. To address this, we propose a comprehensive multimodal framework for robust video-based human activity recognition. Our key contribution is the introduction of a novel compositional query machine, called COMPUTER ($\textbf{COMP}ositional h\textbf{U}man-cen\textbf{T}ric qu\textbf{ER}y$ machine), a generic neural architecture that models the interactions between a human of interest and its surroundings in both space and time. Thanks to its versatile design, COMPUTER can be leveraged to distill distinctive representations for various input modalities. Additionally, we introduce a consistency loss that enforces agreement in prediction between modalities, exploiting the complementary information from multimodal inputs for robust human movement recognition. Through extensive experiments on action localization and group activity recognition tasks, our approach demonstrates superior performance when compared with state-of-the-art methods. Our code is available at: https://github.com/tranxuantuyen/COMPUTER. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.02385v1-abstract-full').style.display = 'none'; document.getElementById('2409.02385v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to BMVC 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2409.01833">arXiv:2409.01833</a> <span> [<a href="https://arxiv.org/pdf/2409.01833">pdf</a>, <a href="https://arxiv.org/ps/2409.01833">ps</a>, <a href="https://arxiv.org/format/2409.01833">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> On the growth of nonconvex functionals at strict local minimizers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Corella%2C+A+D">Alberto Dom铆nguez Corella</a>, <a href="/search/?searchtype=author&query=L%C3%AA%2C+T+M">Tr铆 Minh L锚</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2409.01833v3-abstract-short" style="display: inline;"> In this paper, we present new equivalent conditions for the growth of proper lower semicontinuous functionals at strict local minimizers. The main conditions are a variant of the so-called tilt stability property of local minimizers and an analog of the classic Polyak-艁ojasiewicz condition, where the gradient is replaced by linear perturbations. We derive the following tilting principle: stability… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01833v3-abstract-full').style.display = 'inline'; document.getElementById('2409.01833v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2409.01833v3-abstract-full" style="display: none;"> In this paper, we present new equivalent conditions for the growth of proper lower semicontinuous functionals at strict local minimizers. The main conditions are a variant of the so-called tilt stability property of local minimizers and an analog of the classic Polyak-艁ojasiewicz condition, where the gradient is replaced by linear perturbations. We derive the following tilting principle: stability of minimizers under linear perturbations can infer their stability under nonlinear ones. We show how growth conditions can be used to give convergence rates for the proximal point algorithm. Finally, we give an application to elliptic tracking problems, establishing a novel equivalence between second-order conditions and the sensitivity of solutions with respect to uncertainty in data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2409.01833v3-abstract-full').style.display = 'none'; document.getElementById('2409.01833v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 October, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 September, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">24 pages</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 49J52; 49K40; 90C31; 90C48 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.15353">arXiv:2408.15353</a> <span> [<a href="https://arxiv.org/pdf/2408.15353">pdf</a>, <a href="https://arxiv.org/format/2408.15353">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Physics and Society">physics.soc-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Populations and Evolution">q-bio.PE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Applications">stat.AP</span> </div> </div> <p class="title is-5 mathjax"> Connecting Mass-action Models and Network Models for Infectious Diseases </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Le%2C+T">Thien-Minh Le</a>, <a href="/search/?searchtype=author&query=Onnela%2C+J">Jukka-Pekka Onnela</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.15353v1-abstract-short" style="display: inline;"> Infectious disease modeling is used to forecast epidemics and assess the effectiveness of intervention strategies. Although the core assumption of mass-action models of homogeneously mixed population is often implausible, they are nevertheless routinely used in studying epidemics and provide useful insights. Network models can account for the heterogeneous mixing of populations, which is especiall… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.15353v1-abstract-full').style.display = 'inline'; document.getElementById('2408.15353v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.15353v1-abstract-full" style="display: none;"> Infectious disease modeling is used to forecast epidemics and assess the effectiveness of intervention strategies. Although the core assumption of mass-action models of homogeneously mixed population is often implausible, they are nevertheless routinely used in studying epidemics and provide useful insights. Network models can account for the heterogeneous mixing of populations, which is especially important for studying sexually transmitted diseases. Despite the abundance of research on mass-action and network models, the relationship between them is not well understood. Here, we attempt to bridge the gap by first identifying a spreading rule that results in an exact match between disease spreading on a fully connected network and the classic mass-action models. We then propose a method for mapping epidemic spread on arbitrary networks to a form similar to that of mass-action models. We also provide a theoretical justification for the procedure. Finally, we show the advantages of the proposed methods using synthetic data that is based on an empirical network. These findings help us understand when mass-action models and network models are expected to provide similar results and identify reasons when they do not. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.15353v1-abstract-full').style.display = 'none'; document.getElementById('2408.15353v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.14176">arXiv:2408.14176</a> <span> [<a href="https://arxiv.org/pdf/2408.14176">pdf</a>, <a href="https://arxiv.org/format/2408.14176">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> SwiftBrush v2: Make Your One-step Diffusion Model Better Than Its Teacher </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Dao%2C+T">Trung Dao</a>, <a href="/search/?searchtype=author&query=Nguyen%2C+T+H">Thuan Hoang Nguyen</a>, <a href="/search/?searchtype=author&query=Le%2C+T">Thanh Le</a>, <a href="/search/?searchtype=author&query=Vu%2C+D">Duc Vu</a>, <a href="/search/?searchtype=author&query=Nguyen%2C+K">Khoi Nguyen</a>, <a href="/search/?searchtype=author&query=Pham%2C+C">Cuong Pham</a>, <a href="/search/?searchtype=author&query=Tran%2C+A">Anh Tran</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.14176v2-abstract-short" style="display: inline;"> In this paper, we aim to enhance the performance of SwiftBrush, a prominent one-step text-to-image diffusion model, to be competitive with its multi-step Stable Diffusion counterpart. Initially, we explore the quality-diversity trade-off between SwiftBrush and SD Turbo: the former excels in image diversity, while the latter excels in image quality. This observation motivates our proposed modificat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14176v2-abstract-full').style.display = 'inline'; document.getElementById('2408.14176v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.14176v2-abstract-full" style="display: none;"> In this paper, we aim to enhance the performance of SwiftBrush, a prominent one-step text-to-image diffusion model, to be competitive with its multi-step Stable Diffusion counterpart. Initially, we explore the quality-diversity trade-off between SwiftBrush and SD Turbo: the former excels in image diversity, while the latter excels in image quality. This observation motivates our proposed modifications in the training methodology, including better weight initialization and efficient LoRA training. Moreover, our introduction of a novel clamped CLIP loss enhances image-text alignment and results in improved image quality. Remarkably, by combining the weights of models trained with efficient LoRA and full training, we achieve a new state-of-the-art one-step diffusion model, achieving an FID of 8.14 and surpassing all GAN-based and multi-step Stable Diffusion models. The project page is available at https://swiftbrushv2.github.io. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14176v2-abstract-full').style.display = 'none'; document.getElementById('2408.14176v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to ECCV'24</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.12895">arXiv:2408.12895</a> <span> [<a href="https://arxiv.org/pdf/2408.12895">pdf</a>, <a href="https://arxiv.org/format/2408.12895">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> Ada2I: Enhancing Modality Balance for Multimodal Conversational Emotion Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/?searchtype=author&query=Nguyen%2C+C+T">Cam-Van Thi Nguyen</a>, <a href="/search/?searchtype=author&query=Le%2C+T">The-Son Le</a>, <a href="/search/?searchtype=author&query=Mai%2C+A">Anh-Tuan Mai</a>, <a href="/search/?searchtype=author&query=Le%2C+D">Duc-Trong Le</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.12895v1-abstract-short" style="display: inline;"> Multimodal Emotion Recognition in Conversations (ERC) is a typical multimodal learning task in exploiting various data modalities concurrently. Prior studies on effective multimodal ERC encounter challenges in addressing modality imbalances and optimizing learning across modalities. Dealing with these problems, we present a novel framework named Ada2I, which consists of two inseparable modules nam… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.12895v1-abstract-full').style.display = 'inline'; document.getElementById('2408.12895v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.12895v1-abstract-full" style="display: none;"> Multimodal Emotion Recognition in Conversations (ERC) is a typical multimodal learning task in exploiting various data modalities concurrently. Prior studies on effective multimodal ERC encounter challenges in addressing modality imbalances and optimizing learning across modalities. Dealing with these problems, we present a novel framework named Ada2I, which consists of two inseparable modules namely Adaptive Feature Weighting (AFW) and Adaptive Modality Weighting (AMW) for feature-level and modality-level balancing respectively via leveraging both Inter- and Intra-modal interactions. Additionally, we introduce a refined disparity ratio as part of our training optimization strategy, a simple yet effective measure to assess the overall discrepancy of the model's learning process when handling multiple modalities simultaneously. Experimental results validate the effectiveness of Ada2I with state-of-the-art performance compared to baselines on three benchmark datasets, particularly in addressing modality imbalances. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.12895v1-abstract-full').style.display = 'none'; document.getElementById('2408.12895v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at ACM Multimedia 2024</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Le%2C+T&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Le%2C+T&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Le%2C+T&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Le%2C+T&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Le%2C+T&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Le%2C+T&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>