Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 1,341 results for author: <span class="mathjax">kim, D</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=kim%2C+D">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="kim, D"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=kim%2C+D&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="kim, D"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=kim%2C+D&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=kim%2C+D&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=kim%2C+D&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=kim%2C+D&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=kim%2C+D&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=kim%2C+D&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13909">arXiv:2502.13909</a> <span> [<a href="https://arxiv.org/pdf/2502.13909">pdf</a>, <a href="https://arxiv.org/format/2502.13909">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Lost in Sequence: Do Large Language Models Understand Sequential Recommendation? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+S">Sein Kim</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+H">Hongseok Kang</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+K">Kibum Kim</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+J">Jiwan Kim</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Donghyun Kim</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+M">Minchul Yang</a>, <a href="/search/cs?searchtype=author&query=Oh%2C+K">Kwangjin Oh</a>, <a href="/search/cs?searchtype=author&query=McAuley%2C+J">Julian McAuley</a>, <a href="/search/cs?searchtype=author&query=Park%2C+C">Chanyoung Park</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13909v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have recently emerged as promising tools for recommendation thanks to their advanced textual understanding ability and context-awareness. Despite the current practice of training and evaluating LLM-based recommendation (LLM4Rec) models under a sequential recommendation scenario, we found that whether these models understand the sequential information inherent in users'… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13909v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13909v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13909v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have recently emerged as promising tools for recommendation thanks to their advanced textual understanding ability and context-awareness. Despite the current practice of training and evaluating LLM-based recommendation (LLM4Rec) models under a sequential recommendation scenario, we found that whether these models understand the sequential information inherent in users' item interaction sequences has been largely overlooked. In this paper, we first demonstrate through a series of experiments that existing LLM4Rec models do not fully capture sequential information both during training and inference. Then, we propose a simple yet effective LLM-based sequential recommender, called LLM-SRec, a method that enhances the integration of sequential information into LLMs by distilling the user representations extracted from a pre-trained CF-SRec model into LLMs. Our extensive experiments show that LLM-SRec enhances LLMs' ability to understand users' item interaction sequences, ultimately leading to improved recommendation performance. Furthermore, unlike existing LLM4Rec models that require fine-tuning of LLMs, LLM-SRec achieves state-of-the-art performance by training only a few lightweight MLPs, highlighting its practicality in real-world applications. Our code is available at https://github.com/Sein-Kim/LLM-SRec. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13909v1-abstract-full').style.display = 'none'; document.getElementById('2502.13909v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Under Review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13449">arXiv:2502.13449</a> <span> [<a href="https://arxiv.org/pdf/2502.13449">pdf</a>, <a href="https://arxiv.org/format/2502.13449">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Chemical Physics">physics.chem-ph</span> </div> </div> <p class="title is-5 mathjax"> Mol-LLaMA: Towards General Understanding of Molecules in Large Molecular Language Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+D">Dongki Kim</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+W">Wonbin Lee</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+S+J">Sung Ju Hwang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13449v1-abstract-short" style="display: inline;"> Understanding molecules is key to understanding organisms and driving advances in drug discovery, requiring interdisciplinary knowledge across chemistry and biology. Although large molecular language models have achieved notable success in interpreting molecular structures, their instruction datasets are limited to the specific knowledge from task-oriented datasets and do not fully cover the funda… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13449v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13449v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13449v1-abstract-full" style="display: none;"> Understanding molecules is key to understanding organisms and driving advances in drug discovery, requiring interdisciplinary knowledge across chemistry and biology. Although large molecular language models have achieved notable success in interpreting molecular structures, their instruction datasets are limited to the specific knowledge from task-oriented datasets and do not fully cover the fundamental characteristics of molecules, hindering their abilities as general-purpose molecular assistants. To address this issue, we propose Mol-LLaMA, a large molecular language model that grasps the general knowledge centered on molecules via multi-modal instruction tuning. To this end, we design key data types that encompass the fundamental features of molecules, incorporating essential knowledge from molecular structures. In addition, to improve understanding of molecular features, we introduce a module that integrates complementary information from different molecular encoders, leveraging the distinct advantages of different molecular representations. Our experimental results demonstrate that Mol-LLaMA is capable of comprehending the general features of molecules and generating relevant responses to users' queries with detailed explanations, implying its potential as a general-purpose assistant for molecular analysis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13449v1-abstract-full').style.display = 'none'; document.getElementById('2502.13449v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12523">arXiv:2502.12523</a> <span> [<a href="https://arxiv.org/pdf/2502.12523">pdf</a>, <a href="https://arxiv.org/format/2502.12523">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> Cohesive Subgraph Discovery in Hypergraphs: A Locality-Driven Indexing Framework </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+S">Song Kim</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Dahee Kim</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+J">Junghoon Kim</a>, <a href="/search/cs?searchtype=author&query=Jeong%2C+H+J">Hyun Ji Jeong</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+J">Jungeun Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12523v1-abstract-short" style="display: inline;"> Hypergraphs are increasingly employed to model complex, diverse relationships in modern networks, effectively capturing higher-order interactions. A critical challenge in this domain is the discovery of cohesive subgraphs, which provides valuable insights into hypergraph structures. However, selecting suitable parameters for this task remains unresolved. To address this, we propose an efficient in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12523v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12523v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12523v1-abstract-full" style="display: none;"> Hypergraphs are increasingly employed to model complex, diverse relationships in modern networks, effectively capturing higher-order interactions. A critical challenge in this domain is the discovery of cohesive subgraphs, which provides valuable insights into hypergraph structures. However, selecting suitable parameters for this task remains unresolved. To address this, we propose an efficient indexing framework designed for online retrieval of cohesive subgraphs. Our approach enables rapid identification of desired structures without requiring exhaustive graph traversals, thus ensuring scalability and practicality. This framework has broad applicability, supporting informed decision-making across various domains by offering a comprehensive view of network landscapes. Extensive experiments on real-world datasets demonstrate the effectiveness and efficiency of our proposed indexing technique. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12523v1-abstract-full').style.display = 'none'; document.getElementById('2502.12523v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12471">arXiv:2502.12471</a> <span> [<a href="https://arxiv.org/pdf/2502.12471">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Explainable AI-Driven Neural Activity Analysis in Parkinsonian Rats under Electrical Stimulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+J">Jibum Kim</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+H">Hanseul Choi</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+G">Gaeun Kim</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+S">Sunggu Yang</a>, <a href="/search/cs?searchtype=author&query=Baeg%2C+E">Eunha Baeg</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Donggue Kim</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+S">Seongwon Jin</a>, <a href="/search/cs?searchtype=author&query=Byun%2C+S">Sangwon Byun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12471v1-abstract-short" style="display: inline;"> Parkinson's disease (PD) is a neurodegenerative disorder characterized by motor dysfunction and abnormal neural oscillations. These symptoms can be modulated through electrical stimulation. Traditional neural activity analysis in PD has typically relied on statistical methods, which often introduce bias owing to the need for expert-driven feature extraction. To address this limitation, we explore… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12471v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12471v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12471v1-abstract-full" style="display: none;"> Parkinson's disease (PD) is a neurodegenerative disorder characterized by motor dysfunction and abnormal neural oscillations. These symptoms can be modulated through electrical stimulation. Traditional neural activity analysis in PD has typically relied on statistical methods, which often introduce bias owing to the need for expert-driven feature extraction. To address this limitation, we explore an explainable artificial intelligence (XAI) approach to analyze neural activity in Parkinsonian rats receiving electrical stimulation. Electrocorticogram (ECoG) signals were collected before and after electrical stimulation using graphene-based electrodes that enable less-invasive monitoring and stimulation in PD. EEGNet, a convolutional neural network, classified these ECoG signals into pre- and post-stimulation states. We applied layer-wise relevance propagation, an XAI technique, to identify key neural inputs contributing to the model's decisions, incorporating the spatial electrode information matched to the cortex map. The XAI analysis highlighted area-specific importance in beta and gamma frequency bands, which could not be detected through mean comparison analyses relying on feature extraction. These findings demonstrate the potential of XAI in analyzing neural dynamics in neurodegenerative disorders such as PD, suggesting that the integration of graphene-based electrodes with advanced deep learning models offers a promising solution for real-time PD monitoring and therapy. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12471v1-abstract-full').style.display = 'none'; document.getElementById('2502.12471v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12463">arXiv:2502.12463</a> <span> [<a href="https://arxiv.org/pdf/2502.12463">pdf</a>, <a href="https://arxiv.org/format/2502.12463">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> RTPD: Penetration Depth calculation using Hardware accelerated Ray-Tracing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+Y">YoungWoo Kim</a>, <a href="/search/cs?searchtype=author&query=Kwon%2C+S">Sungmin Kwon</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Duksu Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12463v1-abstract-short" style="display: inline;"> Penetration depth calculation quantifies the extent of overlap between two objects and is crucial in fields like simulations, the metaverse, and robotics. Recognizing its significance, efforts have been made to accelerate this computation using parallel computing resources, such as CPUs and GPUs. Unlike traditional GPU cores, modern GPUs incorporate specialized ray-tracing cores (RT-cores) primari… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12463v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12463v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12463v1-abstract-full" style="display: none;"> Penetration depth calculation quantifies the extent of overlap between two objects and is crucial in fields like simulations, the metaverse, and robotics. Recognizing its significance, efforts have been made to accelerate this computation using parallel computing resources, such as CPUs and GPUs. Unlike traditional GPU cores, modern GPUs incorporate specialized ray-tracing cores (RT-cores) primarily used for rendering applications. We introduce a novel algorithm for penetration depth calculation that leverages RT-cores. Our approach includes a ray-tracing based algorithm for penetration surface extraction and another for calculating Hausdorff distance, optimizing the use of RT-cores. We tested our method across various generations of RTX GPUs with different benchmark scenes. The results demonstrated that our algorithm outperformed a state-of-the-art penetration depth calculation method and conventional GPU implementations by up to 37.66 and 5.33 times, respectively. These findings demonstrate the efficiency of our RT core-based method and suggest broad applicability for RT-cores in diverse computational tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12463v1-abstract-full').style.display = 'none'; document.getElementById('2502.12463v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 8 figures, under review for a journal</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12344">arXiv:2502.12344</a> <span> [<a href="https://arxiv.org/pdf/2502.12344">pdf</a>, <a href="https://arxiv.org/format/2502.12344">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> Hardware-Software Co-Design for Accelerating Transformer Inference Leveraging Compute-in-Memory </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+D+E">Dong Eun Kim</a>, <a href="/search/cs?searchtype=author&query=Sharma%2C+T">Tanvi Sharma</a>, <a href="/search/cs?searchtype=author&query=Roy%2C+K">Kaushik Roy</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12344v1-abstract-short" style="display: inline;"> Transformers have become the backbone of neural network architecture for most machine learning applications. Their widespread use has resulted in multiple efforts on accelerating attention, the basic building block of transformers. This paper tackles the challenges associated with accelerating attention through a hardware-software co-design approach while leveraging compute-in-memory(CIM) architec… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12344v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12344v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12344v1-abstract-full" style="display: none;"> Transformers have become the backbone of neural network architecture for most machine learning applications. Their widespread use has resulted in multiple efforts on accelerating attention, the basic building block of transformers. This paper tackles the challenges associated with accelerating attention through a hardware-software co-design approach while leveraging compute-in-memory(CIM) architecture. In particular, our energy- and area-efficient CIM based accelerator, named HASTILY, aims to accelerate softmax computation, an integral operation in attention, and minimize their high on-chip memory requirements that grows quadratically with input sequence length. Our architecture consists of novel CIM units called unified compute and lookup modules(UCLMs) that integrate both lookup and multiply-accumulate functionality within the same SRAM array, incurring minimal area overhead over standard CIM arrays. Designed in TSMC 65nm, UCLMs can be used to concurrently perform exponential and matrix-vector multiplication operations. Complementing the proposed architecture, HASTILY features a fine-grained pipelining strategy for scheduling both attention and feed-forward layers, to reduce the quadratic dependence on sequence length to linear dependence. Further, for fast softmax computation which involves computing the maxima and sum of exponential values, such operations are parallelized across multiple cores using reduce and gather strategy. We evaluate our proposed architecture using a compiler tailored towards attention computation and a standard cycle-level CIM simulator. Our evaluation shows end-to-end throughput(TOPS) improvement of 4.4x-9.8x and 1.7x-5.9x over Nvidia A40 GPU and baseline CIM hardware, respectively, for BERT models with INT-8 precision. Additionally, it shows gains of 16x-36x in energy-efficiency(TOPS/W) over A40 GPU and similar energy-efficiency as baseline CIM hardware. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12344v1-abstract-full').style.display = 'none'; document.getElementById('2502.12344v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12178">arXiv:2502.12178</a> <span> [<a href="https://arxiv.org/pdf/2502.12178">pdf</a>, <a href="https://arxiv.org/format/2502.12178">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> Direct Preference Optimization-Enhanced Multi-Guided Diffusion Model for Traffic Scenario Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+S">Seungjun Yu</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+K">Kisung Kim</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Daejung Kim</a>, <a href="/search/cs?searchtype=author&query=Han%2C+H">Haewook Han</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J">Jinhan Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12178v1-abstract-short" style="display: inline;"> Diffusion-based models are recognized for their effectiveness in using real-world driving data to generate realistic and diverse traffic scenarios. These models employ guided sampling to incorporate specific traffic preferences and enhance scenario realism. However, guiding the sampling process to conform to traffic rules and preferences can result in deviations from real-world traffic priors and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12178v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12178v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12178v1-abstract-full" style="display: none;"> Diffusion-based models are recognized for their effectiveness in using real-world driving data to generate realistic and diverse traffic scenarios. These models employ guided sampling to incorporate specific traffic preferences and enhance scenario realism. However, guiding the sampling process to conform to traffic rules and preferences can result in deviations from real-world traffic priors and potentially leading to unrealistic behaviors. To address this challenge, we introduce a multi-guided diffusion model that utilizes a novel training strategy to closely adhere to traffic priors, even when employing various combinations of guides. This model adopts a multi-task learning framework, enabling a single diffusion model to process various guide inputs. For increased guided sampling precision, our model is fine-tuned using the Direct Preference Optimization (DPO) algorithm. This algorithm optimizes preferences based on guide scores, effectively navigating the complexities and challenges associated with the expensive and often non-differentiable gradient calculations during the guided sampling fine-tuning process. Evaluated using the nuScenes dataset our model provides a strong baseline for balancing realism, diversity and controllability in the traffic scenario generation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12178v1-abstract-full').style.display = 'none'; document.getElementById('2502.12178v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11386">arXiv:2502.11386</a> <span> [<a href="https://arxiv.org/pdf/2502.11386">pdf</a>, <a href="https://arxiv.org/format/2502.11386">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Intelligent Mobile AI-Generated Content Services via Interactive Prompt Engineering and Dynamic Service Provisioning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yinqiu Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruichen Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/cs?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xianbin Wang</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D+I">Dong In Kim</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hongyang Du</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11386v1-abstract-short" style="display: inline;"> Due to massive computational demands of large generative models, AI-Generated Content (AIGC) can organize collaborative Mobile AIGC Service Providers (MASPs) at network edges to provide ubiquitous and customized content generation for resource-constrained users. However, such a paradigm faces two significant challenges: 1) raw prompts (i.e., the task description from users) often lead to poor gene… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11386v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11386v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11386v1-abstract-full" style="display: none;"> Due to massive computational demands of large generative models, AI-Generated Content (AIGC) can organize collaborative Mobile AIGC Service Providers (MASPs) at network edges to provide ubiquitous and customized content generation for resource-constrained users. However, such a paradigm faces two significant challenges: 1) raw prompts (i.e., the task description from users) often lead to poor generation quality due to users' lack of experience with specific AIGC models, and 2) static service provisioning fails to efficiently utilize computational and communication resources given the heterogeneity of AIGC tasks. To address these challenges, we propose an intelligent mobile AIGC service scheme. Firstly, we develop an interactive prompt engineering mechanism that leverages a Large Language Model (LLM) to generate customized prompt corpora and employs Inverse Reinforcement Learning (IRL) for policy imitation through small-scale expert demonstrations. Secondly, we formulate a dynamic mobile AIGC service provisioning problem that jointly optimizes the number of inference trials and transmission power allocation. Then, we propose the Diffusion-Enhanced Deep Deterministic Policy Gradient (D3PG) algorithm to solve the problem. By incorporating the diffusion process into Deep Reinforcement Learning (DRL) architecture, the environment exploration capability can be improved, thus adapting to varying mobile AIGC scenarios. Extensive experimental results demonstrate that our prompt engineering approach improves single-round generation success probability by 6.3 times, while D3PG increases the user service experience by 67.8% compared to baseline DRL approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11386v1-abstract-full').style.display = 'none'; document.getElementById('2502.11386v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11360">arXiv:2502.11360</a> <span> [<a href="https://arxiv.org/pdf/2502.11360">pdf</a>, <a href="https://arxiv.org/format/2502.11360">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> GeoDANO: Geometric VLM with Domain Agnostic Vision Encoder </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cho%2C+S">Seunghyuk Cho</a>, <a href="/search/cs?searchtype=author&query=Qin%2C+Z">Zhenyue Qin</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yang Liu</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+Y">Youngbin Choi</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+S">Seungbeom Lee</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Dongwoo Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11360v1-abstract-short" style="display: inline;"> We introduce GeoDANO, a geometric vision-language model (VLM) with a domain-agnostic vision encoder, for solving plane geometry problems. Although VLMs have been employed for solving geometry problems, their ability to recognize geometric features remains insufficiently analyzed. To address this gap, we propose a benchmark that evaluates the recognition of visual geometric features, including prim… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11360v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11360v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11360v1-abstract-full" style="display: none;"> We introduce GeoDANO, a geometric vision-language model (VLM) with a domain-agnostic vision encoder, for solving plane geometry problems. Although VLMs have been employed for solving geometry problems, their ability to recognize geometric features remains insufficiently analyzed. To address this gap, we propose a benchmark that evaluates the recognition of visual geometric features, including primitives such as dots and lines, and relations such as orthogonality. Our preliminary study shows that vision encoders often used in general-purpose VLMs, e.g., OpenCLIP, fail to detect these features and struggle to generalize across domains. We develop GeoCLIP, a CLIP based model trained on synthetic geometric diagram-caption pairs to overcome the limitation. Benchmark results show that GeoCLIP outperforms existing vision encoders in recognizing geometric features. We then propose our VLM, GeoDANO, which augments GeoCLIP with a domain adaptation strategy for unseen diagram styles. GeoDANO outperforms specialized methods for plane geometry problems and GPT-4o on MathVerse. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11360v1-abstract-full').style.display = 'none'; document.getElementById('2502.11360v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 7 figures, 5 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10408">arXiv:2502.10408</a> <span> [<a href="https://arxiv.org/pdf/2502.10408">pdf</a>, <a href="https://arxiv.org/format/2502.10408">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Knowledge Tracing in Programming Education Integrating Students' Questions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+D">Doyoun Kim</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+S">Suin Kim</a>, <a href="/search/cs?searchtype=author&query=Jo%2C+Y">Yojan Jo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10408v1-abstract-short" style="display: inline;"> Knowledge tracing (KT) in programming education presents unique challenges due to the complexity of coding tasks and the diverse methods students use to solve problems. Although students' questions often contain valuable signals about their understanding and misconceptions, traditional KT models often neglect to incorporate these questions as inputs to address these challenges. This paper introduc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10408v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10408v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10408v1-abstract-full" style="display: none;"> Knowledge tracing (KT) in programming education presents unique challenges due to the complexity of coding tasks and the diverse methods students use to solve problems. Although students' questions often contain valuable signals about their understanding and misconceptions, traditional KT models often neglect to incorporate these questions as inputs to address these challenges. This paper introduces SQKT (Students' Question-based Knowledge Tracing), a knowledge tracing model that leverages students' questions and automatically extracted skill information to enhance the accuracy of predicting students' performance on subsequent problems in programming education. Our method creates semantically rich embeddings that capture not only the surface-level content of the questions but also the student's mastery level and conceptual understanding. Experimental results demonstrate SQKT's superior performance in predicting student completion across various Python programming courses of differing difficulty levels. In in-domain experiments, SQKT achieved a 33.1\% absolute improvement in AUC compared to baseline models. The model also exhibited robust generalization capabilities in cross-domain settings, effectively addressing data scarcity issues in advanced programming courses. SQKT can be used to tailor educational content to individual learning needs and design adaptive learning systems in computer science education. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10408v1-abstract-full').style.display = 'none'; document.getElementById('2502.10408v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09648">arXiv:2502.09648</a> <span> [<a href="https://arxiv.org/pdf/2502.09648">pdf</a>, <a href="https://arxiv.org/format/2502.09648">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> UKTA: Unified Korean Text Analyzer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ahn%2C+S">Seokho Ahn</a>, <a href="/search/cs?searchtype=author&query=Park%2C+J">Junhyung Park</a>, <a href="/search/cs?searchtype=author&query=Go%2C+G">Ganghee Go</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+C">Chulhui Kim</a>, <a href="/search/cs?searchtype=author&query=Jung%2C+J">Jiho Jung</a>, <a href="/search/cs?searchtype=author&query=Shin%2C+M+S">Myung Sun Shin</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Do-Guk Kim</a>, <a href="/search/cs?searchtype=author&query=Seo%2C+Y">Young-Duk Seo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09648v1-abstract-short" style="display: inline;"> Evaluating writing quality is complex and time-consuming often delaying feedback to learners. While automated writing evaluation tools are effective for English, Korean automated writing evaluation tools face challenges due to their inability to address multi-view analysis, error propagation, and evaluation explainability. To overcome these challenges, we introduce UKTA (Unified Korean Text Analyz… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09648v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09648v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09648v1-abstract-full" style="display: none;"> Evaluating writing quality is complex and time-consuming often delaying feedback to learners. While automated writing evaluation tools are effective for English, Korean automated writing evaluation tools face challenges due to their inability to address multi-view analysis, error propagation, and evaluation explainability. To overcome these challenges, we introduce UKTA (Unified Korean Text Analyzer), a comprehensive Korea text analysis and writing evaluation system. UKTA provides accurate low-level morpheme analysis, key lexical features for mid-level explainability, and transparent high-level rubric-based writing scores. Our approach enhances accuracy and quadratic weighted kappa over existing baseline, positioning UKTA as a leading multi-perspective tool for Korean text analysis and writing evaluation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09648v1-abstract-full').style.display = 'none'; document.getElementById('2502.09648v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by SAC 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09549">arXiv:2502.09549</a> <span> [<a href="https://arxiv.org/pdf/2502.09549">pdf</a>, <a href="https://arxiv.org/format/2502.09549">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Registration, Detection, and Deregistration: Analyzing DNS Abuse for Phishing Attacks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lim%2C+K">Kyungchan Lim</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+K">Kiho Lee</a>, <a href="/search/cs?searchtype=author&query=Sommese%2C+R">Raffaele Sommese</a>, <a href="/search/cs?searchtype=author&query=Jonker%2C+M">Mattis Jonker</a>, <a href="/search/cs?searchtype=author&query=Mok%2C+R">Ricky Mok</a>, <a href="/search/cs?searchtype=author&query=claffy%2C+k">kc claffy</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Doowon Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09549v1-abstract-short" style="display: inline;"> Phishing continues to pose a significant cybersecurity threat. While blocklists currently serve as a primary defense, due to their reactive, passive nature, these delayed responses leave phishing websites operational long enough to harm potential victims. It is essential to address this fundamental challenge at the root, particularly in phishing domains. Domain registration presents a crucial inte… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09549v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09549v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09549v1-abstract-full" style="display: none;"> Phishing continues to pose a significant cybersecurity threat. While blocklists currently serve as a primary defense, due to their reactive, passive nature, these delayed responses leave phishing websites operational long enough to harm potential victims. It is essential to address this fundamental challenge at the root, particularly in phishing domains. Domain registration presents a crucial intervention point, as domains serve as the primary gateway between users and websites. We conduct a comprehensive longitudinal analysis of 690,502 unique phishing domains, spanning a 39 month period, to examine their characteristics and behavioral patterns throughout their lifecycle-from initial registration to detection and eventual deregistration. We find that 66.1% of the domains in our dataset are maliciously registered, leveraging cost-effective TLDs and targeting brands by mimicking their domain names under alternative TLDs (e.g., .top and .tk) instead of the TLDs under which the brand domains are registered (e.g., .com and .ru). We also observe minimal improvements in detection speed for maliciously registered domains compared to compromised domains. Detection times vary widely across blocklists, and phishing domains remain accessible for an average of 11.5 days after detection, prolonging their potential impact. Our systematic investigation uncovers key patterns from registration through detection to deregistration, which could be leveraged to enhance anti-phishing active defenses at the DNS level. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09549v1-abstract-full').style.display = 'none'; document.getElementById('2502.09549v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09007">arXiv:2502.09007</a> <span> [<a href="https://arxiv.org/pdf/2502.09007">pdf</a>, <a href="https://arxiv.org/format/2502.09007">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> RED: Energy Optimization Framework for eDRAM-based PIM with Reconfigurable Voltage Swing and Retention-aware Scheduling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+J">Jae-Young Kim</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Donghyuk Kim</a>, <a href="/search/cs?searchtype=author&query=Yoo%2C+S">Seungjae Yoo</a>, <a href="/search/cs?searchtype=author&query=Yoo%2C+S">Sungyeob Yoo</a>, <a href="/search/cs?searchtype=author&query=Suh%2C+T">Teokkyu Suh</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+J">Joo-Young Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09007v1-abstract-short" style="display: inline;"> In the era of artificial intelligence (AI), Transformer demonstrates its performance across various applications. The excessive amount of parameters incurs high latency and energy overhead when processed in the von Neumann architecture. Processing-in-memory (PIM) has shown the potential in accelerating data-intensive applications by reducing data movement. While previous works mainly optimize the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09007v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09007v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09007v1-abstract-full" style="display: none;"> In the era of artificial intelligence (AI), Transformer demonstrates its performance across various applications. The excessive amount of parameters incurs high latency and energy overhead when processed in the von Neumann architecture. Processing-in-memory (PIM) has shown the potential in accelerating data-intensive applications by reducing data movement. While previous works mainly optimize the computational part of PIM to enhance energy efficiency, the importance of memory design, which consumes the most power in PIM, has been rather neglected. In this work, we present RED, an energy optimization framework for eDRAM-based PIM. We first analyze the PIM operations in eDRAM, obtaining two key observations: 1) memory access energy consumption is predominant in PIM, and 2) read bitline (RBL) voltage swing, sense amplifier power, and retention time are in trade-off relations. Leveraging them, we propose a novel reconfigurable eDRAM and retention-aware scheduling that minimizes the runtime energy consumption of the eDRAM macro. The framework pinpoints the optimal operating point by pre-estimating energy consumption across all possible tiling schemes and memory operations. Then, the reconfigurable eDRAM controls the RBL voltage swing at runtime according to the scheduling, optimizing the memory access power. Moreover, RED employs refresh skipping and sense amplifier power gating to mitigate the energy consumption overhead coming from the trade-off relation. Finally, the RED framework achieves up to 3.05x higher energy efficiency than the prior SRAM-based PIM, reducing the energy consumption of eDRAM macro up to 74.88% with reconfigurable eDRAM and optimization schemes, requiring only 3.5% area and 0.77% energy overhead for scheduling. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09007v1-abstract-full').style.display = 'none'; document.getElementById('2502.09007v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 14 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08432">arXiv:2502.08432</a> <span> [<a href="https://arxiv.org/pdf/2502.08432">pdf</a>, <a href="https://arxiv.org/format/2502.08432">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Closer through commonality: Enhancing hypergraph contrastive learning with shared groups </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Roh%2C+D">Daeyoung Roh</a>, <a href="/search/cs?searchtype=author&query=Han%2C+D">Donghee Han</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Daehee Kim</a>, <a href="/search/cs?searchtype=author&query=Han%2C+K">Keejun Han</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+M">Mun Yi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08432v1-abstract-short" style="display: inline;"> Hypergraphs provide a superior modeling framework for representing complex multidimensional relationships in the context of real-world interactions that often occur in groups, overcoming the limitations of traditional homogeneous graphs. However, there have been few studies on hypergraphbased contrastive learning, and existing graph-based contrastive learning methods have not been able to fully ex… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08432v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08432v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08432v1-abstract-full" style="display: none;"> Hypergraphs provide a superior modeling framework for representing complex multidimensional relationships in the context of real-world interactions that often occur in groups, overcoming the limitations of traditional homogeneous graphs. However, there have been few studies on hypergraphbased contrastive learning, and existing graph-based contrastive learning methods have not been able to fully exploit the highorder correlation information in hypergraphs. Here, we propose a Hypergraph Fine-grained contrastive learning (HyFi) method designed to exploit the complex high-dimensional information inherent in hypergraphs. While avoiding traditional graph augmentation methods that corrupt the hypergraph topology, the proposed method provides a simple and efficient learning augmentation function by adding noise to node features. Furthermore, we expands beyond the traditional dichotomous relationship between positive and negative samples in contrastive learning by introducing a new relationship of weak positives. It demonstrates the importance of fine-graining positive samples in contrastive learning. Therefore, HyFi is able to produce highquality embeddings, and outperforms both supervised and unsupervised baselines in average rank on node classification across 10 datasets. Our approach effectively exploits high-dimensional hypergraph information, shows significant improvement over existing graph-based contrastive learning methods, and is efficient in terms of training speed and GPU memory cost. The source code is available at https://github.com/Noverse0/HyFi.git. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08432v1-abstract-full').style.display = 'none'; document.getElementById('2502.08432v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11page, 5 figures, 6 tables, 2024 IEEE International Conference on Big Data</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08011">arXiv:2502.08011</a> <span> [<a href="https://arxiv.org/pdf/2502.08011">pdf</a>, <a href="https://arxiv.org/format/2502.08011">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Training-Free Safe Denoisers for Safe Use of Diffusion Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+M">Mingyu Kim</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Dongjun Kim</a>, <a href="/search/cs?searchtype=author&query=Yusuf%2C+A">Amman Yusuf</a>, <a href="/search/cs?searchtype=author&query=Ermon%2C+S">Stefano Ermon</a>, <a href="/search/cs?searchtype=author&query=Park%2C+M+J">Mi Jung Park</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08011v2-abstract-short" style="display: inline;"> There is growing concern over the safety of powerful diffusion models (DMs), as they are often misused to produce inappropriate, not-safe-for-work (NSFW) content or generate copyrighted material or data of individuals who wish to be forgotten. Many existing methods tackle these issues by heavily relying on text-based negative prompts or extensively retraining DMs to eliminate certain features or s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08011v2-abstract-full').style.display = 'inline'; document.getElementById('2502.08011v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08011v2-abstract-full" style="display: none;"> There is growing concern over the safety of powerful diffusion models (DMs), as they are often misused to produce inappropriate, not-safe-for-work (NSFW) content or generate copyrighted material or data of individuals who wish to be forgotten. Many existing methods tackle these issues by heavily relying on text-based negative prompts or extensively retraining DMs to eliminate certain features or samples. In this paper, we take a radically different approach, directly modifying the sampling trajectory by leveraging a negation set (e.g., unsafe images, copyrighted data, or datapoints needed to be excluded) to avoid specific regions of data distribution, without needing to retrain or fine-tune DMs. We formally derive the relationship between the expected denoised samples that are safe and those that are not safe, leading to our $\textit{safe}$ denoiser which ensures its final samples are away from the area to be negated. Inspired by the derivation, we develop a practical algorithm that successfully produces high-quality samples while avoiding negation areas of the data distribution in text-conditional, class-conditional, and unconditional image generation scenarios. These results hint at the great potential of our training-free safe denoiser for using DMs more safely. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08011v2-abstract-full').style.display = 'none'; document.getElementById('2502.08011v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06251">arXiv:2502.06251</a> <span> [<a href="https://arxiv.org/pdf/2502.06251">pdf</a>, <a href="https://arxiv.org/format/2502.06251">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3708557.3716334">10.1145/3708557.3716334 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Amplifying Minority Voices: AI-Mediated Devil's Advocate System for Inclusive Group Decision-Making </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lee%2C+S">Soohwan Lee</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+M">Mingyu Kim</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+S">Seoyeong Hwang</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Dajung Kim</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+K">Kyungho Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06251v1-abstract-short" style="display: inline;"> Group decision-making often benefits from diverse perspectives, yet power imbalances and social influence can stifle minority opinions and compromise outcomes. This prequel introduces an AI-mediated communication system that leverages the Large Language Model to serve as a devil's advocate, representing underrepresented viewpoints without exposing minority members' identities. Rooted in persuasive… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06251v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06251v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06251v1-abstract-full" style="display: none;"> Group decision-making often benefits from diverse perspectives, yet power imbalances and social influence can stifle minority opinions and compromise outcomes. This prequel introduces an AI-mediated communication system that leverages the Large Language Model to serve as a devil's advocate, representing underrepresented viewpoints without exposing minority members' identities. Rooted in persuasive communication strategies and anonymity, the system aims to improve psychological safety and foster more inclusive decision-making. Our multi-agent architecture, which consists of a summary agent, conversation agent, AI duplicate checker, and paraphrase agent, encourages the group's critical thinking while reducing repetitive outputs. We acknowledge that reliance on text-based communication and fixed intervention timings may limit adaptability, indicating pathways for refinement. By focusing on the representation of minority viewpoints anonymously in power-imbalanced settings, this approach highlights how AI-driven methods can evolve to support more divergent and inclusive group decision-making. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06251v1-abstract-full').style.display = 'none'; document.getElementById('2502.06251v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 3 figures. This is a preprint version of the poster accepted to ACM Conference on Intelligent User Interfaces(IUI'25)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04471">arXiv:2502.04471</a> <span> [<a href="https://arxiv.org/pdf/2502.04471">pdf</a>, <a href="https://arxiv.org/format/2502.04471">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Identifying Flaky Tests in Quantum Code: A Machine Learning Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kaur%2C+K">Khushdeep Kaur</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Dongchan Kim</a>, <a href="/search/cs?searchtype=author&query=Jamshidi%2C+A">Ainaz Jamshidi</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lei Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04471v1-abstract-short" style="display: inline;"> Testing and debugging quantum software pose significant challenges due to the inherent complexities of quantum mechanics, such as superposition and entanglement. One challenge is indeterminacy, a fundamental characteristic of quantum systems, which increases the likelihood of flaky tests in quantum programs. To the best of our knowledge, there is a lack of comprehensive studies on quantum flakines… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04471v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04471v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04471v1-abstract-full" style="display: none;"> Testing and debugging quantum software pose significant challenges due to the inherent complexities of quantum mechanics, such as superposition and entanglement. One challenge is indeterminacy, a fundamental characteristic of quantum systems, which increases the likelihood of flaky tests in quantum programs. To the best of our knowledge, there is a lack of comprehensive studies on quantum flakiness in the existing literature. In this paper, we present a novel machine learning platform that leverages multiple machine learning models to automatically detect flaky tests in quantum programs. Our evaluation shows that the extreme gradient boosting and decision tree-based models outperform other models (i.e., random forest, k-nearest neighbors, and support vector machine), achieving the highest F1 score and Matthews Correlation Coefficient in a balanced dataset and an imbalanced dataset, respectively. Furthermore, we expand the currently limited dataset for researchers interested in quantum flaky tests. In the future, we plan to explore the development of unsupervised learning techniques to detect and classify quantum flaky tests more effectively. These advancements aim to improve the reliability and robustness of quantum software testing. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04471v1-abstract-full').style.display = 'none'; document.getElementById('2502.04471v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 1 figure, accepted by Q-SANER 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01357">arXiv:2502.01357</a> <span> [<a href="https://arxiv.org/pdf/2502.01357">pdf</a>, <a href="https://arxiv.org/format/2502.01357">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Bayesian Approximation-Based Trajectory Prediction and Tracking with 4D Radar </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+D">Dong-In Kim</a>, <a href="/search/cs?searchtype=author&query=Paek%2C+D">Dong-Hee Paek</a>, <a href="/search/cs?searchtype=author&query=Song%2C+S">Seung-Hyun Song</a>, <a href="/search/cs?searchtype=author&query=Kong%2C+S">Seung-Hyun Kong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01357v1-abstract-short" style="display: inline;"> Accurate 3D multi-object tracking (MOT) is vital for autonomous vehicles, yet LiDAR and camera-based methods degrade in adverse weather. Meanwhile, Radar-based solutions remain robust but often suffer from limited vertical resolution and simplistic motion models. Existing Kalman filter-based approaches also rely on fixed noise covariance, hampering adaptability when objects make sudden maneuvers.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01357v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01357v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01357v1-abstract-full" style="display: none;"> Accurate 3D multi-object tracking (MOT) is vital for autonomous vehicles, yet LiDAR and camera-based methods degrade in adverse weather. Meanwhile, Radar-based solutions remain robust but often suffer from limited vertical resolution and simplistic motion models. Existing Kalman filter-based approaches also rely on fixed noise covariance, hampering adaptability when objects make sudden maneuvers. We propose Bayes-4DRTrack, a 4D Radar-based MOT framework that adopts a transformer-based motion prediction network to capture nonlinear motion dynamics and employs Bayesian approximation in both detection and prediction steps. Moreover, our two-stage data association leverages Doppler measurements to better distinguish closely spaced targets. Evaluated on the K-Radar dataset (including adverse weather scenarios), Bayes-4DRTrack demonstrates a 5.7% gain in Average Multi-Object Tracking Accuracy (AMOTA) over methods with traditional motion models and fixed noise covariance. These results showcase enhanced robustness and accuracy in demanding, real-world conditions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01357v1-abstract-full').style.display = 'none'; document.getElementById('2502.01357v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01092">arXiv:2502.01092</a> <span> [<a href="https://arxiv.org/pdf/2502.01092">pdf</a>, <a href="https://arxiv.org/format/2502.01092">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Feature Tracking Reliability for Visual Navigation using Real-Time Safety Filter </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+D">Dabin Kim</a>, <a href="/search/cs?searchtype=author&query=Jang%2C+I">Inkyu Jang</a>, <a href="/search/cs?searchtype=author&query=Han%2C+Y">Youngsoo Han</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+S">Sunwoo Hwang</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+H+J">H. Jin Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01092v1-abstract-short" style="display: inline;"> Vision sensors are extensively used for localizing a robot's pose, particularly in environments where global localization tools such as GPS or motion capture systems are unavailable. In many visual navigation systems, localization is achieved by detecting and tracking visual features or landmarks, which provide information about the sensor's relative pose. For reliable feature tracking and accurat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01092v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01092v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01092v1-abstract-full" style="display: none;"> Vision sensors are extensively used for localizing a robot's pose, particularly in environments where global localization tools such as GPS or motion capture systems are unavailable. In many visual navigation systems, localization is achieved by detecting and tracking visual features or landmarks, which provide information about the sensor's relative pose. For reliable feature tracking and accurate pose estimation, it is crucial to maintain visibility of a sufficient number of features. This requirement can sometimes conflict with the robot's overall task objective. In this paper, we approach it as a constrained control problem. By leveraging the invariance properties of visibility constraints within the robot's kinematic model, we propose a real-time safety filter based on quadratic programming. This filter takes a reference velocity command as input and produces a modified velocity that minimally deviates from the reference while ensuring the information score from the currently visible features remains above a user-specified threshold. Numerical simulations demonstrate that the proposed safety filter preserves the invariance condition and ensures the visibility of more features than the required minimum. We also validated its real-world performance by integrating it into a visual simultaneous localization and mapping (SLAM) algorithm, where it maintained high estimation quality in challenging environments, outperforming a simple tracking controller. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01092v1-abstract-full').style.display = 'none'; document.getElementById('2502.01092v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 6 figures, Accepted to 2025 IEEE International Conference on Robotics & Automation (ICRA 2025)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00027">arXiv:2502.00027</a> <span> [<a href="https://arxiv.org/pdf/2502.00027">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> </div> <p class="title is-5 mathjax"> Analysis of a Memcapacitor-Based for Neural Network Accelerator Framework </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Singh%2C+A">Ankur Singh</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Dowon Kim</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+B">Byung-Geun Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00027v1-abstract-short" style="display: inline;"> Data-intensive computing tasks, such as training neural networks, are crucial for artificial intelligence applications but often come with high energy demands. One promising solution is to develop specialized hardware that directly maps neural networks, utilizing arrays of memristive devices to perform parallel multiply-accumulate operations. In our research, we introduce a novel CMOS-based memcap… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00027v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00027v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00027v1-abstract-full" style="display: none;"> Data-intensive computing tasks, such as training neural networks, are crucial for artificial intelligence applications but often come with high energy demands. One promising solution is to develop specialized hardware that directly maps neural networks, utilizing arrays of memristive devices to perform parallel multiply-accumulate operations. In our research, we introduce a novel CMOS-based memcapacitor circuit that is validated using the cadence tool. Additionally, we developed the device in Python to facilitate the design of a memcapacitive-based accelerator. Our proposed framework employs a crossbar array of memcapacitor devices to train a neural network capable of digit classification and CIFAR dataset recognition. We tested the non-ideal characteristics of the constructed memcapacitor-based neural network. The system achieved an impressive 98.4% training accuracy in digit recognition and 94.4% training accuracy in CIFAR recognition, highlighting its effectiveness. This study demonstrates the potential of memcapacitor-based neural network systems in handling classification tasks and sets the stage for further advancements in neuromorphic computing. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00027v1-abstract-full').style.display = 'none'; document.getElementById('2502.00027v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.19066">arXiv:2501.19066</a> <span> [<a href="https://arxiv.org/pdf/2501.19066">pdf</a>, <a href="https://arxiv.org/format/2501.19066">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Concept Steerers: Leveraging K-Sparse Autoencoders for Controllable Generations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+D">Dahye Kim</a>, <a href="/search/cs?searchtype=author&query=Ghadiyaram%2C+D">Deepti Ghadiyaram</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.19066v1-abstract-short" style="display: inline;"> Despite the remarkable progress in text-to-image generative models, they are prone to adversarial attacks and inadvertently generate unsafe, unethical content. Existing approaches often rely on fine-tuning models to remove specific concepts, which is computationally expensive, lack scalability, and/or compromise generation quality. In this work, we propose a novel framework leveraging k-sparse aut… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.19066v1-abstract-full').style.display = 'inline'; document.getElementById('2501.19066v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.19066v1-abstract-full" style="display: none;"> Despite the remarkable progress in text-to-image generative models, they are prone to adversarial attacks and inadvertently generate unsafe, unethical content. Existing approaches often rely on fine-tuning models to remove specific concepts, which is computationally expensive, lack scalability, and/or compromise generation quality. In this work, we propose a novel framework leveraging k-sparse autoencoders (k-SAEs) to enable efficient and interpretable concept manipulation in diffusion models. Specifically, we first identify interpretable monosemantic concepts in the latent space of text embeddings and leverage them to precisely steer the generation away or towards a given concept (e.g., nudity) or to introduce a new concept (e.g., photographic style). Through extensive experiments, we demonstrate that our approach is very simple, requires no retraining of the base model nor LoRA adapters, does not compromise the generation quality, and is robust to adversarial prompt manipulations. Our method yields an improvement of $\mathbf{20.01\%}$ in unsafe concept removal, is effective in style manipulation, and is $\mathbf{\sim5}$x faster than current state-of-the-art. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.19066v1-abstract-full').style.display = 'none'; document.getElementById('2501.19066v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 16 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15076">arXiv:2501.15076</a> <span> [<a href="https://arxiv.org/pdf/2501.15076">pdf</a>, <a href="https://arxiv.org/format/2501.15076">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Cryptanalysis via Machine Learning Based Information Theoretic Metrics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+B+D">Benjamin D. Kim</a>, <a href="/search/cs?searchtype=author&query=Vasudevan%2C+V+A">Vipindev Adat Vasudevan</a>, <a href="/search/cs?searchtype=author&query=D%27Oliveira%2C+R+G+L">Rafael G. L. D'Oliveira</a>, <a href="/search/cs?searchtype=author&query=Cohen%2C+A">Alejandro Cohen</a>, <a href="/search/cs?searchtype=author&query=Stahlbuhk%2C+T">Thomas Stahlbuhk</a>, <a href="/search/cs?searchtype=author&query=M%C3%A9dard%2C+M">Muriel M茅dard</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15076v1-abstract-short" style="display: inline;"> The fields of machine learning (ML) and cryptanalysis share an interestingly common objective of creating a function, based on a given set of inputs and outputs. However, the approaches and methods in doing so vary vastly between the two fields. In this paper, we explore integrating the knowledge from the ML domain to provide empirical evaluations of cryptosystems. Particularly, we utilize informa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15076v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15076v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15076v1-abstract-full" style="display: none;"> The fields of machine learning (ML) and cryptanalysis share an interestingly common objective of creating a function, based on a given set of inputs and outputs. However, the approaches and methods in doing so vary vastly between the two fields. In this paper, we explore integrating the knowledge from the ML domain to provide empirical evaluations of cryptosystems. Particularly, we utilize information theoretic metrics to perform ML-based distribution estimation. We propose two novel applications of ML algorithms that can be applied in a known plaintext setting to perform cryptanalysis on any cryptosystem. We use mutual information neural estimation to calculate a cryptosystem's mutual information leakage, and a binary cross entropy classification to model an indistinguishability under chosen plaintext attack (CPA). These algorithms can be readily applied in an audit setting to evaluate the robustness of a cryptosystem and the results can provide a useful empirical bound. We evaluate the efficacy of our methodologies by empirically analyzing several encryption schemes. Furthermore, we extend the analysis to novel network coding-based cryptosystems and provide other use cases for our algorithms. We show that our classification model correctly identifies the encryption schemes that are not IND-CPA secure, such as DES, RSA, and AES ECB, with high accuracy. It also identifies the faults in CPA-secure cryptosystems with faulty parameters, such a reduced counter version of AES-CTR. We also conclude that with our algorithms, in most cases a smaller-sized neural network using less computing power can identify vulnerabilities in cryptosystems, providing a quick check of the sanity of the cryptosystem and help to decide whether to spend more resources to deploy larger networks that are able to break the cryptosystem. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15076v1-abstract-full').style.display = 'none'; document.getElementById('2501.15076v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14328">arXiv:2501.14328</a> <span> [<a href="https://arxiv.org/pdf/2501.14328">pdf</a>, <a href="https://arxiv.org/format/2501.14328">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> Securing DRAM at Scale: ARFM-Driven Row Hammer Defense with Unveiling the Threat of Short tRC Patterns </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Joo%2C+N">Nogeun Joo</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Donghyuk Kim</a>, <a href="/search/cs?searchtype=author&query=Cho%2C+H">Hyunjun Cho</a>, <a href="/search/cs?searchtype=author&query=Noh%2C+J">Junseok Noh</a>, <a href="/search/cs?searchtype=author&query=Jung%2C+D">Dongha Jung</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+J">Joo-Young Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14328v1-abstract-short" style="display: inline;"> To address the issue of powerful row hammer (RH) attacks, our study involved an extensive analysis of the prevalent attack patterns in the field. We discovered a strong correlation between the timing and density of the active-to-active command period, ${tRC}$, and the likelihood of RH attacks. In this paper, we introduce MARC, an innovative ARFM-driven RH mitigation IP that significantly reinforce… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14328v1-abstract-full').style.display = 'inline'; document.getElementById('2501.14328v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14328v1-abstract-full" style="display: none;"> To address the issue of powerful row hammer (RH) attacks, our study involved an extensive analysis of the prevalent attack patterns in the field. We discovered a strong correlation between the timing and density of the active-to-active command period, ${tRC}$, and the likelihood of RH attacks. In this paper, we introduce MARC, an innovative ARFM-driven RH mitigation IP that significantly reinforces existing RH mitigation IPs. MARC dynamically adjusts the frequency of RFM in response to the severity of the RH attack environment, offering a tailored security solution that not only detects the threats but also adapts to varying threat levels. MARC's detection mechanism has demonstrated remarkable efficiency, identifying over 99\% of attack patterns. Moreover, MARC is designed as a compact hardware module, facilitating tight integration either on the memory controller-side or DRAM-side within the memory system. It only occupies a negligible hardware area of 3363~\textit{$渭m^2$}. By activating ARFM based on MARC's detection, the additional energy overhead is also negligible in normal workloads. We conduct experiments to compare the highest row count throughout the patterns, defined as max exposure, between the vanilla RH mitigation IPs and the MARC-enhanced versions of the same IPs, focusing on both DRAM-side and memory controller-side. On the DRAM-side, MARC + probabilistic scheme and MARC + counter-based tracking scheme achieve 8.1$\times$ and 1.5$\times$ improvement in max exposure ratio compared to the vanilla IPs, respectively. On the memory controller-side, the MARC + PARA and MARC + Graphene achieve 50$\times$ and 5.7$\times$ improvement in max exposure ratio compared to the vanilla IPs, respectively. MARC ensures optimal security without sacrificing system performance, making MARC a pioneering solution in the realm of RH attack mitigation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14328v1-abstract-full').style.display = 'none'; document.getElementById('2501.14328v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 19 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14249">arXiv:2501.14249</a> <span> [<a href="https://arxiv.org/pdf/2501.14249">pdf</a>, <a href="https://arxiv.org/format/2501.14249">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Humanity's Last Exam </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Phan%2C+L">Long Phan</a>, <a href="/search/cs?searchtype=author&query=Gatti%2C+A">Alice Gatti</a>, <a href="/search/cs?searchtype=author&query=Han%2C+Z">Ziwen Han</a>, <a href="/search/cs?searchtype=author&query=Li%2C+N">Nathaniel Li</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+J">Josephina Hu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Hugh Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C+B+C">Chen Bo Calvin Zhang</a>, <a href="/search/cs?searchtype=author&query=Shaaban%2C+M">Mohamed Shaaban</a>, <a href="/search/cs?searchtype=author&query=Ling%2C+J">John Ling</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+S">Sean Shi</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+M">Michael Choi</a>, <a href="/search/cs?searchtype=author&query=Agrawal%2C+A">Anish Agrawal</a>, <a href="/search/cs?searchtype=author&query=Chopra%2C+A">Arnav Chopra</a>, <a href="/search/cs?searchtype=author&query=Khoja%2C+A">Adam Khoja</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+R">Ryan Kim</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+R">Richard Ren</a>, <a href="/search/cs?searchtype=author&query=Hausenloy%2C+J">Jason Hausenloy</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+O">Oliver Zhang</a>, <a href="/search/cs?searchtype=author&query=Mazeika%2C+M">Mantas Mazeika</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T">Tung Nguyen</a>, <a href="/search/cs?searchtype=author&query=Anderson%2C+D">Daron Anderson</a>, <a href="/search/cs?searchtype=author&query=Shah%2C+I+A">Imad Ali Shah</a>, <a href="/search/cs?searchtype=author&query=Doroshenko%2C+M">Mikhail Doroshenko</a>, <a href="/search/cs?searchtype=author&query=Stokes%2C+A+C">Alun Cennyth Stokes</a>, <a href="/search/cs?searchtype=author&query=Mahmood%2C+M">Mobeen Mahmood</a> , et al. (710 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14249v4-abstract-short" style="display: inline;"> Benchmarks are important tools for tracking the rapid advancements in large language model (LLM) capabilities. However, benchmarks are not keeping pace in difficulty: LLMs now achieve over 90\% accuracy on popular benchmarks like MMLU, limiting informed measurement of state-of-the-art LLM capabilities. In response, we introduce Humanity's Last Exam (HLE), a multi-modal benchmark at the frontier of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14249v4-abstract-full').style.display = 'inline'; document.getElementById('2501.14249v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14249v4-abstract-full" style="display: none;"> Benchmarks are important tools for tracking the rapid advancements in large language model (LLM) capabilities. However, benchmarks are not keeping pace in difficulty: LLMs now achieve over 90\% accuracy on popular benchmarks like MMLU, limiting informed measurement of state-of-the-art LLM capabilities. In response, we introduce Humanity's Last Exam (HLE), a multi-modal benchmark at the frontier of human knowledge, designed to be the final closed-ended academic benchmark of its kind with broad subject coverage. HLE consists of 3,000 questions across dozens of subjects, including mathematics, humanities, and the natural sciences. HLE is developed globally by subject-matter experts and consists of multiple-choice and short-answer questions suitable for automated grading. Each question has a known solution that is unambiguous and easily verifiable, but cannot be quickly answered via internet retrieval. State-of-the-art LLMs demonstrate low accuracy and calibration on HLE, highlighting a significant gap between current LLM capabilities and the expert human frontier on closed-ended academic questions. To inform research and policymaking upon a clear understanding of model capabilities, we publicly release HLE at https://lastexam.ai. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14249v4-abstract-full').style.display = 'none'; document.getElementById('2501.14249v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">27 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.12189">arXiv:2501.12189</a> <span> [<a href="https://arxiv.org/pdf/2501.12189">pdf</a>, <a href="https://arxiv.org/format/2501.12189">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> MirrorCBO: A consensus-based optimization method in the spirit of mirror descent </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bungert%2C+L">Leon Bungert</a>, <a href="/search/cs?searchtype=author&query=Hoffmann%2C+F">Franca Hoffmann</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D+Y">Doh Yeon Kim</a>, <a href="/search/cs?searchtype=author&query=Roith%2C+T">Tim Roith</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.12189v1-abstract-short" style="display: inline;"> In this work we propose MirrorCBO, a consensus-based optimization (CBO) method which generalizes standard CBO in the same way that mirror descent generalizes gradient descent. For this we apply the CBO methodology to a swarm of dual particles and retain the primal particle positions by applying the inverse of the mirror map, which we parametrize as the subdifferential of a strongly convex function… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12189v1-abstract-full').style.display = 'inline'; document.getElementById('2501.12189v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.12189v1-abstract-full" style="display: none;"> In this work we propose MirrorCBO, a consensus-based optimization (CBO) method which generalizes standard CBO in the same way that mirror descent generalizes gradient descent. For this we apply the CBO methodology to a swarm of dual particles and retain the primal particle positions by applying the inverse of the mirror map, which we parametrize as the subdifferential of a strongly convex function $蠁$. In this way, we combine the advantages of a derivative-free non-convex optimization algorithm with those of mirror descent. As a special case, the method extends CBO to optimization problems with convex constraints. Assuming bounds on the Bregman distance associated to $蠁$, we provide asymptotic convergence results for MirrorCBO with explicit exponential rate. Another key contribution is an exploratory numerical study of this new algorithm across different application settings, focusing on (i) sparsity-inducing optimization, and (ii) constrained optimization, demonstrating the competitive performance of MirrorCBO. We observe empirically that the method can also be used for optimization on (non-convex) submanifolds of Euclidean space, can be adapted to mirrored versions of other recent CBO variants, and that it inherits from mirror descent the capability to select desirable minimizers, like sparse ones. We also include an overview of recent CBO approaches for constrained optimization and compare their performance to MirrorCBO. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12189v1-abstract-full').style.display = 'none'; document.getElementById('2501.12189v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">64 pages, 18 figures, 19 tables</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 35B40; 35Q84; 35Q89; 35Q90; 65K10; 90C26; 90C56 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.10513">arXiv:2501.10513</a> <span> [<a href="https://arxiv.org/pdf/2501.10513">pdf</a>, <a href="https://arxiv.org/format/2501.10513">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> ConfigBot: Adaptive Resource Allocation for Robot Applications in Dynamic Environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dwivedula%2C+R">Rohit Dwivedula</a>, <a href="/search/cs?searchtype=author&query=Modak%2C+S">Sadanand Modak</a>, <a href="/search/cs?searchtype=author&query=Akella%2C+A">Aditya Akella</a>, <a href="/search/cs?searchtype=author&query=Biswas%2C+J">Joydeep Biswas</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Daehyeok Kim</a>, <a href="/search/cs?searchtype=author&query=Rossbach%2C+C+J">Christopher J. Rossbach</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.10513v1-abstract-short" style="display: inline;"> The growing use of autonomous mobile service robots (AMSRs) in dynamic environments requires flexible management of compute resources to optimize the performance of diverse tasks such as navigation, localization, perception, and so on. Current robot deployments, which oftentimes rely on static configurations (of the OS, applications, etc.) and system over-provisioning, fall short since they do not… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10513v1-abstract-full').style.display = 'inline'; document.getElementById('2501.10513v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.10513v1-abstract-full" style="display: none;"> The growing use of autonomous mobile service robots (AMSRs) in dynamic environments requires flexible management of compute resources to optimize the performance of diverse tasks such as navigation, localization, perception, and so on. Current robot deployments, which oftentimes rely on static configurations (of the OS, applications, etc.) and system over-provisioning, fall short since they do not account for the tasks' performance variations resulting in poor system-wide behavior such as robot instability and/or inefficient resource use. This paper presents ConfigBot, a system designed to adaptively reconfigure AMSR applications to meet a predefined performance specification by leveraging runtime profiling and automated configuration tuning. Through experiments on a Boston Dynamics Spot robot equipped with NVIDIA AGX Orin, we demonstrate ConfigBot's efficacy in maintaining system stability and optimizing resource allocation across diverse scenarios. Our findings highlight the promise of tailored and dynamic configurations for robot deployments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10513v1-abstract-full').style.display = 'none'; document.getElementById('2501.10513v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 13 figures, 6 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.09391">arXiv:2501.09391</a> <span> [<a href="https://arxiv.org/pdf/2501.09391">pdf</a>, <a href="https://arxiv.org/format/2501.09391">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Contract-Inspired Contest Theory for Controllable Image Generation in Mobile Edge Metaverse </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guangyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hongyang Du</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/cs?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D+I">Dong In Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.09391v1-abstract-short" style="display: inline;"> The rapid advancement of immersive technologies has propelled the development of the Metaverse, where the convergence of virtual and physical realities necessitates the generation of high-quality, photorealistic images to enhance user experience. However, generating these images, especially through Generative Diffusion Models (GDMs), in mobile edge computing environments presents significant chall… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09391v1-abstract-full').style.display = 'inline'; document.getElementById('2501.09391v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.09391v1-abstract-full" style="display: none;"> The rapid advancement of immersive technologies has propelled the development of the Metaverse, where the convergence of virtual and physical realities necessitates the generation of high-quality, photorealistic images to enhance user experience. However, generating these images, especially through Generative Diffusion Models (GDMs), in mobile edge computing environments presents significant challenges due to the limited computing resources of edge devices and the dynamic nature of wireless networks. This paper proposes a novel framework that integrates contract-inspired contest theory, Deep Reinforcement Learning (DRL), and GDMs to optimize image generation in these resource-constrained environments. The framework addresses the critical challenges of resource allocation and semantic data transmission quality by incentivizing edge devices to efficiently transmit high-quality semantic data, which is essential for creating realistic and immersive images. The use of contest and contract theory ensures that edge devices are motivated to allocate resources effectively, while DRL dynamically adjusts to network conditions, optimizing the overall image generation process. Experimental results demonstrate that the proposed approach not only improves the quality of generated images but also achieves superior convergence speed and stability compared to traditional methods. This makes the framework particularly effective for optimizing complex resource allocation tasks in mobile edge Metaverse applications, offering enhanced performance and efficiency in creating immersive virtual environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09391v1-abstract-full').style.display = 'none'; document.getElementById('2501.09391v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages, 10figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.08990">arXiv:2501.08990</a> <span> [<a href="https://arxiv.org/pdf/2501.08990">pdf</a>, <a href="https://arxiv.org/format/2501.08990">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> 3GPP Network Architecture Enhancement for Ambient IoT Service </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+D">Dongjoo Kim</a>, <a href="/search/cs?searchtype=author&query=Godin%2C+P">Philippe Godin</a>, <a href="/search/cs?searchtype=author&query=Bjerrum%2C+B">Bo Bjerrum</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+P">Pallab Gupta</a>, <a href="/search/cs?searchtype=author&query=Butt%2C+M+M">M. Majid Butt</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.08990v1-abstract-short" style="display: inline;"> Ambient internet of things (A-IoT) paradigm is under study in 3GPP with the intention to provide a sustainable solution for the IoT market without any need to replace the batteries and operate in harsh environments where it is difficult to replenish batteries. This article provides insight on 3rd Generation Partnership Project (3GPP) discussions in Release 18 and 19 with the focus on network archi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08990v1-abstract-full').style.display = 'inline'; document.getElementById('2501.08990v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.08990v1-abstract-full" style="display: none;"> Ambient internet of things (A-IoT) paradigm is under study in 3GPP with the intention to provide a sustainable solution for the IoT market without any need to replace the batteries and operate in harsh environments where it is difficult to replenish batteries. This article provides insight on 3rd Generation Partnership Project (3GPP) discussions in Release 18 and 19 with the focus on network architecture aspects. 3GPP has recently decided to start normative work in its Radio Access Network (RAN) Working Group (WG) and discussions are ongoing to start a work item in other WGs with more focus on architecture aspects. We explore and analyze various aspects of system design related to architecture requirements to support A-IoT service, different architecture options to consider, security and authentication mechanisms for A-IoT devices as well as key challenges for standardization of A-IoT service. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08990v1-abstract-full').style.display = 'none'; document.getElementById('2501.08990v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.08547">arXiv:2501.08547</a> <span> [<a href="https://arxiv.org/pdf/2501.08547">pdf</a>, <a href="https://arxiv.org/format/2501.08547">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> OMEGA: A Low-Latency GNN Serving System for Large Graphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+G">Geon-Woo Kim</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Donghyun Kim</a>, <a href="/search/cs?searchtype=author&query=Moon%2C+J">Jeongyoon Moon</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Henry Liu</a>, <a href="/search/cs?searchtype=author&query=Khan%2C+T">Tarannum Khan</a>, <a href="/search/cs?searchtype=author&query=Iyer%2C+A">Anand Iyer</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Daehyeok Kim</a>, <a href="/search/cs?searchtype=author&query=Akella%2C+A">Aditya Akella</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.08547v1-abstract-short" style="display: inline;"> Graph Neural Networks (GNNs) have been widely adopted for their ability to compute expressive node representations in graph datasets. However, serving GNNs on large graphs is challenging due to the high communication, computation, and memory overheads of constructing and executing computation graphs, which represent information flow across large neighborhoods. Existing approximation techniques in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08547v1-abstract-full').style.display = 'inline'; document.getElementById('2501.08547v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.08547v1-abstract-full" style="display: none;"> Graph Neural Networks (GNNs) have been widely adopted for their ability to compute expressive node representations in graph datasets. However, serving GNNs on large graphs is challenging due to the high communication, computation, and memory overheads of constructing and executing computation graphs, which represent information flow across large neighborhoods. Existing approximation techniques in training can mitigate the overheads but, in serving, still lead to high latency and/or accuracy loss. To this end, we propose OMEGA, a system that enables low-latency GNN serving for large graphs with minimal accuracy loss through two key ideas. First, OMEGA employs selective recomputation of precomputed embeddings, which allows for reusing precomputed computation subgraphs while selectively recomputing a small fraction to minimize accuracy loss. Second, we develop computation graph parallelism, which reduces communication overhead by parallelizing the creation and execution of computation graphs across machines. Our evaluation with large graph datasets and GNN models shows that OMEGA significantly outperforms state-of-the-art techniques. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08547v1-abstract-full').style.display = 'none'; document.getElementById('2501.08547v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.08253">arXiv:2501.08253</a> <span> [<a href="https://arxiv.org/pdf/2501.08253">pdf</a>, <a href="https://arxiv.org/format/2501.08253">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Jigsaw: Authoring Immersive Storytelling Experiences with Augmented Reality and Internet of Things </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lei Zhang</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Daekun Kim</a>, <a href="/search/cs?searchtype=author&query=Cho%2C+Y">Youjean Cho</a>, <a href="/search/cs?searchtype=author&query=Robinson%2C+A">Ava Robinson</a>, <a href="/search/cs?searchtype=author&query=Tham%2C+Y+J">Yu Jiang Tham</a>, <a href="/search/cs?searchtype=author&query=Vaish%2C+R">Rajan Vaish</a>, <a href="/search/cs?searchtype=author&query=Monroy-Hern%C3%A1ndez%2C+A">Andr茅s Monroy-Hern谩ndez</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.08253v1-abstract-short" style="display: inline;"> Augmented Reality (AR) presents new opportunities for immersive storytelling. However, this immersiveness faces two main hurdles. First, AR's immersive quality is often confined to visual elements, such as pixels on a screen. Second, crafting immersive narratives is complex and generally beyond the reach of amateurs due to the need for advanced technical skills. We introduce Jigsaw, a system that… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08253v1-abstract-full').style.display = 'inline'; document.getElementById('2501.08253v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.08253v1-abstract-full" style="display: none;"> Augmented Reality (AR) presents new opportunities for immersive storytelling. However, this immersiveness faces two main hurdles. First, AR's immersive quality is often confined to visual elements, such as pixels on a screen. Second, crafting immersive narratives is complex and generally beyond the reach of amateurs due to the need for advanced technical skills. We introduce Jigsaw, a system that empowers beginners to both experience and craft immersive stories, blending virtual and physical elements. Jigsaw uniquely combines mobile AR with readily available Internet-of-things (IoT) devices. We conducted a qualitative study with 20 participants to assess Jigsaw's effectiveness in both consuming and creating immersive narratives. The results were promising: participants not only successfully created their own immersive stories but also found the playback of three such stories deeply engaging. However, sensory overload emerged as a significant challenge in these experiences. We discuss design trade-offs and considerations for future endeavors in immersive storytelling involving AR and IoT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08253v1-abstract-full').style.display = 'none'; document.getElementById('2501.08253v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">In Proceedings of the 2024 CHI Conference on Human Factors in Computing Systems (CHI '24). 14 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.07730">arXiv:2501.07730</a> <span> [<a href="https://arxiv.org/pdf/2501.07730">pdf</a>, <a href="https://arxiv.org/format/2501.07730">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Democratizing Text-to-Image Masked Generative Models with Compact Text-Aware One-Dimensional Tokens </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+D">Dongwon Kim</a>, <a href="/search/cs?searchtype=author&query=He%2C+J">Ju He</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+Q">Qihang Yu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+C">Chenglin Yang</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+X">Xiaohui Shen</a>, <a href="/search/cs?searchtype=author&query=Kwak%2C+S">Suha Kwak</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+L">Liang-Chieh Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.07730v1-abstract-short" style="display: inline;"> Image tokenizers form the foundation of modern text-to-image generative models but are notoriously difficult to train. Furthermore, most existing text-to-image models rely on large-scale, high-quality private datasets, making them challenging to replicate. In this work, we introduce Text-Aware Transformer-based 1-Dimensional Tokenizer (TA-TiTok), an efficient and powerful image tokenizer that can… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.07730v1-abstract-full').style.display = 'inline'; document.getElementById('2501.07730v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.07730v1-abstract-full" style="display: none;"> Image tokenizers form the foundation of modern text-to-image generative models but are notoriously difficult to train. Furthermore, most existing text-to-image models rely on large-scale, high-quality private datasets, making them challenging to replicate. In this work, we introduce Text-Aware Transformer-based 1-Dimensional Tokenizer (TA-TiTok), an efficient and powerful image tokenizer that can utilize either discrete or continuous 1-dimensional tokens. TA-TiTok uniquely integrates textual information during the tokenizer decoding stage (i.e., de-tokenization), accelerating convergence and enhancing performance. TA-TiTok also benefits from a simplified, yet effective, one-stage training process, eliminating the need for the complex two-stage distillation used in previous 1-dimensional tokenizers. This design allows for seamless scalability to large datasets. Building on this, we introduce a family of text-to-image Masked Generative Models (MaskGen), trained exclusively on open data while achieving comparable performance to models trained on private data. We aim to release both the efficient, strong TA-TiTok tokenizers and the open-data, open-weight MaskGen models to promote broader access and democratize the field of text-to-image masked generative models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.07730v1-abstract-full').style.display = 'none'; document.getElementById('2501.07730v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project page at https://tacju.github.io/projects/maskgen.html</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.07582">arXiv:2501.07582</a> <span> [<a href="https://arxiv.org/pdf/2501.07582">pdf</a>, <a href="https://arxiv.org/format/2501.07582">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3658139">10.1145/3658139 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Spin-Weighted Spherical Harmonics for Polarized Light Transport </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yi%2C+S">Shinyoung Yi</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Donggun Kim</a>, <a href="/search/cs?searchtype=author&query=Na%2C+J">Jiwoong Na</a>, <a href="/search/cs?searchtype=author&query=Tong%2C+X">Xin Tong</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+M+H">Min H. Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.07582v1-abstract-short" style="display: inline;"> The objective of polarization rendering is to simulate the interaction of light with materials exhibiting polarization-dependent behavior. However, integrating polarization into rendering is challenging and increases computational costs significantly. The primary difficulty lies in efficiently modeling and computing the complex reflection phenomena associated with polarized light. Specifically, fr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.07582v1-abstract-full').style.display = 'inline'; document.getElementById('2501.07582v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.07582v1-abstract-full" style="display: none;"> The objective of polarization rendering is to simulate the interaction of light with materials exhibiting polarization-dependent behavior. However, integrating polarization into rendering is challenging and increases computational costs significantly. The primary difficulty lies in efficiently modeling and computing the complex reflection phenomena associated with polarized light. Specifically, frequency-domain analysis, essential for efficient environment lighting and storage of complex light interactions, is lacking. To efficiently simulate and reproduce polarized light interactions using frequency-domain techniques, we address the challenge of maintaining continuity in polarized light transport represented by Stokes vectors within angular domains. The conventional spherical harmonics method cannot effectively handle continuity and rotation invariance for Stokes vectors. To overcome this, we develop a new method called polarized spherical harmonics (PSH) based on the spin-weighted spherical harmonics theory. Our method provides a rotation-invariant representation of Stokes vector fields. Furthermore, we introduce frequency domain formulations of polarized rendering equations and spherical convolution based on PSH. We first define spherical convolution on Stokes vector fields in the angular domain, and it also provides efficient computation of polarized light transport, nearly on an entry-wise product in the frequency domain. Our frequency domain formulation, including spherical convolution, led to the development of the first real-time polarization rendering technique under polarized environmental illumination, named precomputed polarized radiance transfer, using our polarized spherical harmonics. Results demonstrate that our method can effectively and accurately simulate and reproduce polarized light interactions in complex reflection phenomena. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.07582v1-abstract-full').style.display = 'none'; document.getElementById('2501.07582v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.3.7 </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> ACM Transactions on Graphics 43, 4, Article 127 (July 2024) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.07161">arXiv:2501.07161</a> <span> [<a href="https://arxiv.org/pdf/2501.07161">pdf</a>, <a href="https://arxiv.org/format/2501.07161">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> QuantuneV2: Compiler-Based Local Metric-Driven Mixed Precision Quantization for Practical Embedded AI Applications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+J">Jeongseok Kim</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J">Jemin Lee</a>, <a href="/search/cs?searchtype=author&query=Kwon%2C+Y">Yongin Kwon</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Daeyoung Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.07161v1-abstract-short" style="display: inline;"> Mixed-precision quantization methods have been proposed to reduce model size while minimizing accuracy degradation. However, existing studies require retraining and do not consider the computational overhead and intermediate representations (IR) generated during the compilation process, limiting their application at the compiler level. This computational overhead refers to the runtime latency caus… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.07161v1-abstract-full').style.display = 'inline'; document.getElementById('2501.07161v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.07161v1-abstract-full" style="display: none;"> Mixed-precision quantization methods have been proposed to reduce model size while minimizing accuracy degradation. However, existing studies require retraining and do not consider the computational overhead and intermediate representations (IR) generated during the compilation process, limiting their application at the compiler level. This computational overhead refers to the runtime latency caused by frequent quantization and dequantization operations during inference. Performing these operations at the individual operator level causes significant runtime delays. To address these issues, we propose QuantuneV2, a compiler-based mixed-precision quantization method designed for practical embedded AI applications. QuantuneV2 performs inference only twice, once before quantization and once after quantization, and operates with a computational complexity of O(n) that increases linearly with the number of model parameters. We also made the sensitivity analysis more stable by using local metrics like weights, activation values, the Signal to Quantization Noise Ratio, and the Mean Squared Error. We also cut down on computational overhead by choosing the best IR and using operator fusion. Experimental results show that QuantuneV2 achieved up to a 10.28 percent improvement in accuracy and a 12.52 percent increase in speed compared to existing methods across five models: ResNet18v1, ResNet50v1, SqueezeNetv1, VGGNet, and MobileNetv2. This demonstrates that QuantuneV2 enhances model performance while maintaining computational efficiency, making it suitable for deployment in embedded AI environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.07161v1-abstract-full').style.display = 'none'; document.getElementById('2501.07161v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 10 figures, Accepted in Future Generation Computer Systems Journal</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.06780">arXiv:2501.06780</a> <span> [<a href="https://arxiv.org/pdf/2501.06780">pdf</a>, <a href="https://arxiv.org/format/2501.06780">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Emerging Technologies">cs.ET</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Programming Languages">cs.PL</span> </div> </div> <p class="title is-5 mathjax"> COMPASS: A Compiler Framework for Resource-Constrained Crossbar-Array Based In-Memory Deep Learning Accelerators </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Park%2C+J">Jihoon Park</a>, <a href="/search/cs?searchtype=author&query=Choe%2C+J">Jeongin Choe</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Dohyun Kim</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+J">Jae-Joon Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.06780v1-abstract-short" style="display: inline;"> Recently, crossbar array based in-memory accelerators have been gaining interest due to their high throughput and energy efficiency. While software and compiler support for the in-memory accelerators has also been introduced, they are currently limited to the case where all weights are assumed to be on-chip. This limitation becomes apparent with the significantly increasing network sizes compared… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06780v1-abstract-full').style.display = 'inline'; document.getElementById('2501.06780v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.06780v1-abstract-full" style="display: none;"> Recently, crossbar array based in-memory accelerators have been gaining interest due to their high throughput and energy efficiency. While software and compiler support for the in-memory accelerators has also been introduced, they are currently limited to the case where all weights are assumed to be on-chip. This limitation becomes apparent with the significantly increasing network sizes compared to the in-memory footprint. Weight replacement schemes are essential to address this issue. We propose COMPASS, a compiler framework for resource-constrained crossbar-based processing-in-memory (PIM) deep neural network (DNN) accelerators. COMPASS is specially targeted for networks that exceed the capacity of PIM crossbar arrays, necessitating access to external memories. We propose an algorithm to determine the optimal partitioning that divides the layers so that each partition can be accelerated on chip. Our scheme takes into account the data dependence between layers, core utilization, and the number of write instructions to minimize latency, memory accesses, and improve energy efficiency. Simulation results demonstrate that COMPASS can accommodate much more networks using a minimal memory footprint, while improving throughput by 1.78X and providing 1.28X savings in energy-delay product (EDP) over baseline partitioning methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06780v1-abstract-full').style.display = 'none'; document.getElementById('2501.06780v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted IEEE DATE 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.06293">arXiv:2501.06293</a> <span> [<a href="https://arxiv.org/pdf/2501.06293">pdf</a>, <a href="https://arxiv.org/format/2501.06293">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Instrumentation and Methods for Astrophysics">astro-ph.IM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Earth and Planetary Astrophysics">astro-ph.EP</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Astrophysics of Galaxies">astro-ph.GA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> LensNet: Enhancing Real-time Microlensing Event Discovery with Recurrent Neural Networks in the Korea Microlensing Telescope Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Via%C3%B1a%2C+J">Javier Via帽a</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+K">Kyu-Ha Hwang</a>, <a href="/search/cs?searchtype=author&query=de+Beurs%2C+Z">Zo毛 de Beurs</a>, <a href="/search/cs?searchtype=author&query=Yee%2C+J+C">Jennifer C. Yee</a>, <a href="/search/cs?searchtype=author&query=Vanderburg%2C+A">Andrew Vanderburg</a>, <a href="/search/cs?searchtype=author&query=Albrow%2C+M+D">Michael D. Albrow</a>, <a href="/search/cs?searchtype=author&query=Chung%2C+S">Sun-Ju Chung</a>, <a href="/search/cs?searchtype=author&query=Gould%2C+A">Andrew Gould</a>, <a href="/search/cs?searchtype=author&query=Han%2C+C">Cheongho Han</a>, <a href="/search/cs?searchtype=author&query=Jung%2C+Y+K">Youn Kil Jung</a>, <a href="/search/cs?searchtype=author&query=Ryu%2C+Y">Yoon-Hyun Ryu</a>, <a href="/search/cs?searchtype=author&query=Shin%2C+I">In-Gu Shin</a>, <a href="/search/cs?searchtype=author&query=Shvartzvald%2C+Y">Yossi Shvartzvald</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+H">Hongjing Yang</a>, <a href="/search/cs?searchtype=author&query=Zang%2C+W">Weicheng Zang</a>, <a href="/search/cs?searchtype=author&query=Cha%2C+S">Sang-Mok Cha</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Dong-Jin Kim</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+S">Seung-Lee Kim</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+C">Chung-Uk Lee</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+D">Dong-Joo Lee</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+Y">Yongseok Lee</a>, <a href="/search/cs?searchtype=author&query=Park%2C+B">Byeong-Gon Park</a>, <a href="/search/cs?searchtype=author&query=Pogge%2C+R+W">Richard W. Pogge</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.06293v1-abstract-short" style="display: inline;"> Traditional microlensing event vetting methods require highly trained human experts, and the process is both complex and time-consuming. This reliance on manual inspection often leads to inefficiencies and constrains the ability to scale for widespread exoplanet detection, ultimately hindering discovery rates. To address the limits of traditional microlensing event vetting, we have developed LensN… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06293v1-abstract-full').style.display = 'inline'; document.getElementById('2501.06293v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.06293v1-abstract-full" style="display: none;"> Traditional microlensing event vetting methods require highly trained human experts, and the process is both complex and time-consuming. This reliance on manual inspection often leads to inefficiencies and constrains the ability to scale for widespread exoplanet detection, ultimately hindering discovery rates. To address the limits of traditional microlensing event vetting, we have developed LensNet, a machine learning pipeline specifically designed to distinguish legitimate microlensing events from false positives caused by instrumental artifacts, such as pixel bleed trails and diffraction spikes. Our system operates in conjunction with a preliminary algorithm that detects increasing trends in flux. These flagged instances are then passed to LensNet for further classification, allowing for timely alerts and follow-up observations. Tailored for the multi-observatory setup of the Korea Microlensing Telescope Network (KMTNet) and trained on a rich dataset of manually classified events, LensNet is optimized for early detection and warning of microlensing occurrences, enabling astronomers to organize follow-up observations promptly. The internal model of the pipeline employs a multi-branch Recurrent Neural Network (RNN) architecture that evaluates time-series flux data with contextual information, including sky background, the full width at half maximum of the target star, flux errors, PSF quality flags, and air mass for each observation. We demonstrate a classification accuracy above 87.5%, and anticipate further improvements as we expand our training set and continue to refine the algorithm. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06293v1-abstract-full').style.display = 'none'; document.getElementById('2501.06293v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">23 pages, 13 figures, Accepted for publication in the The Astronomical Journal</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 85-08 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> J.2 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02952">arXiv:2501.02952</a> <span> [<a href="https://arxiv.org/pdf/2501.02952">pdf</a>, <a href="https://arxiv.org/format/2501.02952">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Online Collaborative Resource Allocation and Task Offloading for Multi-access Edge Computing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+G">Geng Sun</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+M">Minghua Yuan</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Z">Zemin Sun</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hongyang Du</a>, <a href="/search/cs?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/cs?searchtype=author&query=Han%2C+Z">Zhu Han</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D+I">Dong In Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02952v1-abstract-short" style="display: inline;"> Multi-access edge computing (MEC) is emerging as a promising paradigm to provide flexible computing services close to user devices (UDs). However, meeting the computation-hungry and delay-sensitive demands of UDs faces several challenges, including the resource constraints of MEC servers, inherent dynamic and complex features in the MEC system, and difficulty in dealing with the time-coupled and d… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02952v1-abstract-full').style.display = 'inline'; document.getElementById('2501.02952v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02952v1-abstract-full" style="display: none;"> Multi-access edge computing (MEC) is emerging as a promising paradigm to provide flexible computing services close to user devices (UDs). However, meeting the computation-hungry and delay-sensitive demands of UDs faces several challenges, including the resource constraints of MEC servers, inherent dynamic and complex features in the MEC system, and difficulty in dealing with the time-coupled and decision-coupled optimization. In this work, we first present an edge-cloud collaborative MEC architecture, where the MEC servers and cloud collaboratively provide offloading services for UDs. Moreover, we formulate an energy-efficient and delay-aware optimization problem (EEDAOP) to minimize the energy consumption of UDs under the constraints of task deadlines and long-term queuing delays. Since the problem is proved to be non-convex mixed integer nonlinear programming (MINLP), we propose an online joint communication resource allocation and task offloading approach (OJCTA). Specifically, we transform EEDAOP into a real-time optimization problem by employing the Lyapunov optimization framework. Then, to solve the real-time optimization problem, we propose a communication resource allocation and task offloading optimization method by employing the Tammer decomposition mechanism, convex optimization method, bilateral matching mechanism, and dependent rounding method. Simulation results demonstrate that the proposed OJCTA can achieve superior system performance compared to the benchmark approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02952v1-abstract-full').style.display = 'none'; document.getElementById('2501.02952v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02787">arXiv:2501.02787</a> <span> [<a href="https://arxiv.org/pdf/2501.02787">pdf</a>, <a href="https://arxiv.org/format/2501.02787">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Joint Optimization of UAV-Carried IRS for Urban Low Altitude mmWave Communications with Deep Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+W">Wenwen Xie</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+G">Geng Sun</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+B">Bei Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiahui Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hongyang Du</a>, <a href="/search/cs?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D+I">Dong In Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02787v1-abstract-short" style="display: inline;"> Emerging technologies in sixth generation (6G) of wireless communications, such as terahertz communication and ultra-massive multiple-input multiple-output, present promising prospects. Despite the high data rate potential of millimeter wave communications, millimeter wave (mmWave) communications in urban low altitude economy (LAE) environments are constrained by challenges such as signal attenuat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02787v1-abstract-full').style.display = 'inline'; document.getElementById('2501.02787v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02787v1-abstract-full" style="display: none;"> Emerging technologies in sixth generation (6G) of wireless communications, such as terahertz communication and ultra-massive multiple-input multiple-output, present promising prospects. Despite the high data rate potential of millimeter wave communications, millimeter wave (mmWave) communications in urban low altitude economy (LAE) environments are constrained by challenges such as signal attenuation and multipath interference. Specially, in urban environments, mmWave communication experiences significant attenuation due to buildings, owing to its short wavelength, which necessitates developing innovative approaches to improve the robustness of such communications in LAE networking. In this paper, we explore the use of an unmanned aerial vehicle (UAV)-carried intelligent reflecting surface (IRS) to support low altitude mmWave communication. Specifically, we consider a typical urban low altitude communication scenario where a UAV-carried IRS establishes a line-of-sight (LoS) channel between the mobile users and a source user (SU) despite the presence of obstacles. Subsequently, we formulate an optimization problem aimed at maximizing the transmission rates and minimizing the energy consumption of the UAV by jointly optimizing phase shifts of the IRS and UAV trajectory. Given the non-convex nature of the problem and its high dynamics, we propose a deep reinforcement learning-based approach incorporating neural episodic control, long short-term memory, and an IRS phase shift control method to enhance the stability and accelerate the convergence. Simulation results show that the proposed algorithm effectively resolves the problem and surpasses other benchmark algorithms in various performances. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02787v1-abstract-full').style.display = 'none'; document.getElementById('2501.02787v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02570">arXiv:2501.02570</a> <span> [<a href="https://arxiv.org/pdf/2501.02570">pdf</a>, <a href="https://arxiv.org/format/2501.02570">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Decoding fMRI Data into Captions using Prefix Language Modeling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shen%2C+V">Vyacheslav Shen</a>, <a href="/search/cs?searchtype=author&query=Kunanbayev%2C+K">Kassymzhomart Kunanbayev</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Dae-Shik Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02570v1-abstract-short" style="display: inline;"> With the advancements in Large Language and Latent Diffusion models, brain decoding has achieved remarkable results in recent years. The works on the NSD dataset, with stimuli images from the COCO dataset, leverage the embeddings from the CLIP model for image reconstruction and GIT for captioning. However, the current captioning approach introduces the challenge of potential data contamination giv… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02570v1-abstract-full').style.display = 'inline'; document.getElementById('2501.02570v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02570v1-abstract-full" style="display: none;"> With the advancements in Large Language and Latent Diffusion models, brain decoding has achieved remarkable results in recent years. The works on the NSD dataset, with stimuli images from the COCO dataset, leverage the embeddings from the CLIP model for image reconstruction and GIT for captioning. However, the current captioning approach introduces the challenge of potential data contamination given that the GIT model was trained on the COCO dataset. In this work, we present an alternative method for decoding brain signals into image captions by predicting a DINOv2 model's embedding of an image from the corresponding fMRI signal and then providing its [CLS] token as the prefix to the GPT-2 language model which decreases computational requirements considerably. Additionally, instead of commonly used Linear Regression, we explore 3D Convolutional Neural Network mapping of fMRI signals to image embedding space for better accounting positional information of voxels. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02570v1-abstract-full').style.display = 'none'; document.getElementById('2501.02570v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">4 pages, 2 tables, 1 figure</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02504">arXiv:2501.02504</a> <span> [<a href="https://arxiv.org/pdf/2501.02504">pdf</a>, <a href="https://arxiv.org/format/2501.02504">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Watch Video, Catch Keyword: Context-aware Keyword Attention for Moment Retrieval and Highlight Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Um%2C+S+J">Sung Jin Um</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Dongjin Kim</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+S">Sangmin Lee</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+J+U">Jung Uk Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02504v1-abstract-short" style="display: inline;"> The goal of video moment retrieval and highlight detection is to identify specific segments and highlights based on a given text query. With the rapid growth of video content and the overlap between these tasks, recent works have addressed both simultaneously. However, they still struggle to fully capture the overall video context, making it challenging to determine which words are most relevant.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02504v1-abstract-full').style.display = 'inline'; document.getElementById('2501.02504v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02504v1-abstract-full" style="display: none;"> The goal of video moment retrieval and highlight detection is to identify specific segments and highlights based on a given text query. With the rapid growth of video content and the overlap between these tasks, recent works have addressed both simultaneously. However, they still struggle to fully capture the overall video context, making it challenging to determine which words are most relevant. In this paper, we present a novel Video Context-aware Keyword Attention module that overcomes this limitation by capturing keyword variation within the context of the entire video. To achieve this, we introduce a video context clustering module that provides concise representations of the overall video context, thereby enhancing the understanding of keyword dynamics. Furthermore, we propose a keyword weight detection module with keyword-aware contrastive learning that incorporates keyword information to enhance fine-grained alignment between visual and textual features. Extensive experiments on the QVHighlights, TVSum, and Charades-STA benchmarks demonstrate that our proposed method significantly improves performance in moment retrieval and highlight detection tasks compared to existing approaches. Our code is available at: https://github.com/VisualAIKHU/Keyword-DETR <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02504v1-abstract-full').style.display = 'none'; document.getElementById('2501.02504v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at AAAI 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02259">arXiv:2501.02259</a> <span> [<a href="https://arxiv.org/pdf/2501.02259">pdf</a>, <a href="https://arxiv.org/ps/2501.02259">ps</a>, <a href="https://arxiv.org/format/2501.02259">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Metric Geometry">math.MG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Geometry">cs.CG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="General Topology">math.GN</span> </div> </div> <p class="title is-5 mathjax"> Expensive Homeomorphism of Convex Bodies </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+D">Donghan Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02259v1-abstract-short" style="display: inline;"> In this paper, we address the longstanding question of whether expansive homeomorphisms can exist within convex bodies in Euclidean spaces. Utilizing fundamental tools from topology, including the Borsuk-Ulam theorem and Brouwer's fixed-point theorem, we establish the nonexistence of such mappings. Through an inductive approach based on dimension and the extension of boundary homeomorphisms, we de… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02259v1-abstract-full').style.display = 'inline'; document.getElementById('2501.02259v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02259v1-abstract-full" style="display: none;"> In this paper, we address the longstanding question of whether expansive homeomorphisms can exist within convex bodies in Euclidean spaces. Utilizing fundamental tools from topology, including the Borsuk-Ulam theorem and Brouwer's fixed-point theorem, we establish the nonexistence of such mappings. Through an inductive approach based on dimension and the extension of boundary homeomorphisms, we demonstrate that expansive homeomorphisms are incompatible with the compact and convex structure of these bodies. This work highlights the interplay between topological principles and metric geometry, offering new insights into the constraints imposed by convexity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02259v1-abstract-full').style.display = 'none'; document.getElementById('2501.02259v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02159">arXiv:2501.02159</a> <span> [<a href="https://arxiv.org/pdf/2501.02159">pdf</a>, <a href="https://arxiv.org/ps/2501.02159">ps</a>, <a href="https://arxiv.org/format/2501.02159">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Metric Geometry">math.MG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Geometry">cs.CG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Geometric Topology">math.GT</span> </div> </div> <p class="title is-5 mathjax"> Equichordal Points of Convex Bodies </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jang%2C+L">Leo Jang</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Donghan Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02159v1-abstract-short" style="display: inline;"> The equichordal point problem is a classical question in geometry, asking whether there exist multiple equichordal points within a single convex body. An equichordal point is defined as a point through which all chords of the convex body have the same length. This problem, initially posed by Fujiwara and further investigated by Blaschke, Rothe, and Weitzenb枚ck, has remained an intriguing challenge… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02159v1-abstract-full').style.display = 'inline'; document.getElementById('2501.02159v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02159v1-abstract-full" style="display: none;"> The equichordal point problem is a classical question in geometry, asking whether there exist multiple equichordal points within a single convex body. An equichordal point is defined as a point through which all chords of the convex body have the same length. This problem, initially posed by Fujiwara and further investigated by Blaschke, Rothe, and Weitzenb枚ck, has remained an intriguing challenge, particularly in higher dimensions. In this paper, we rigorously prove the nonexistence of multiple equichordal points in $n$-dimensional convex bodies for $n \geq 2$. By utilizing topological tools such as the Borsuk-Ulam theorem and analyzing the properties of continuous functions and mappings on convex bodies, we resolve this long-standing question. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02159v1-abstract-full').style.display = 'none'; document.getElementById('2501.02159v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.01980">arXiv:2501.01980</a> <span> [<a href="https://arxiv.org/pdf/2501.01980">pdf</a>, <a href="https://arxiv.org/format/2501.01980">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3687767">10.1145/3687767 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Polarimetric BSSRDF Acquisition of Dynamic Faces </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ha%2C+H">Hyunho Ha</a>, <a href="/search/cs?searchtype=author&query=Hwang%2C+I">Inseung Hwang</a>, <a href="/search/cs?searchtype=author&query=Monzon%2C+N">Nestor Monzon</a>, <a href="/search/cs?searchtype=author&query=Cho%2C+J">Jaemin Cho</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Donggun Kim</a>, <a href="/search/cs?searchtype=author&query=Baek%2C+S">Seung-Hwan Baek</a>, <a href="/search/cs?searchtype=author&query=Mu%C3%B1oz%2C+A">Adolfo Mu帽oz</a>, <a href="/search/cs?searchtype=author&query=Gutierrez%2C+D">Diego Gutierrez</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+M+H">Min H. Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.01980v1-abstract-short" style="display: inline;"> Acquisition and modeling of polarized light reflection and scattering help reveal the shape, structure, and physical characteristics of an object, which is increasingly important in computer graphics. However, current polarimetric acquisition systems are limited to static and opaque objects. Human faces, on the other hand, present a particularly difficult challenge, given their complex structure a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01980v1-abstract-full').style.display = 'inline'; document.getElementById('2501.01980v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.01980v1-abstract-full" style="display: none;"> Acquisition and modeling of polarized light reflection and scattering help reveal the shape, structure, and physical characteristics of an object, which is increasingly important in computer graphics. However, current polarimetric acquisition systems are limited to static and opaque objects. Human faces, on the other hand, present a particularly difficult challenge, given their complex structure and reflectance properties, the strong presence of spatially-varying subsurface scattering, and their dynamic nature. We present a new polarimetric acquisition method for dynamic human faces, which focuses on capturing spatially varying appearance and precise geometry, across a wide spectrum of skin tones and facial expressions. It includes both single and heterogeneous subsurface scattering, index of refraction, and specular roughness and intensity, among other parameters, while revealing biophysically-based components such as inner- and outer-layer hemoglobin, eumelanin and pheomelanin. Our method leverages such components' unique multispectral absorption profiles to quantify their concentrations, which in turn inform our model about the complex interactions occurring within the skin layers. To our knowledge, our work is the first to simultaneously acquire polarimetric and spectral reflectance information alongside biophysically-based skin parameters and geometry of dynamic human faces. Moreover, our polarimetric skin model integrates seamlessly into various rendering pipelines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01980v1-abstract-full').style.display = 'none'; document.getElementById('2501.01980v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.3.7 </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> ACM Transactions on Graphics 43, 6, Article 275 (December 2024) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.01372">arXiv:2501.01372</a> <span> [<a href="https://arxiv.org/pdf/2501.01372">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> ScarNet: A Novel Foundation Model for Automated Myocardial Scar Quantification from LGE in Cardiac MRI </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tavakoli%2C+N">Neda Tavakoli</a>, <a href="/search/cs?searchtype=author&query=Rahsepar%2C+A+A">Amir Ali Rahsepar</a>, <a href="/search/cs?searchtype=author&query=Benefield%2C+B+C">Brandon C. Benefield</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+D">Daming Shen</a>, <a href="/search/cs?searchtype=author&query=L%C3%B3pez-Tapia%2C+S">Santiago L贸pez-Tapia</a>, <a href="/search/cs?searchtype=author&query=Schiffers%2C+F">Florian Schiffers</a>, <a href="/search/cs?searchtype=author&query=Goldberger%2C+J+J">Jeffrey J. Goldberger</a>, <a href="/search/cs?searchtype=author&query=Albert%2C+C+M">Christine M. Albert</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+E">Edwin Wu</a>, <a href="/search/cs?searchtype=author&query=Katsaggelos%2C+A+K">Aggelos K. Katsaggelos</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+D+C">Daniel C. Lee</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Daniel Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.01372v1-abstract-short" style="display: inline;"> Background: Late Gadolinium Enhancement (LGE) imaging is the gold standard for assessing myocardial fibrosis and scarring, with left ventricular (LV) LGE extent predicting major adverse cardiac events (MACE). Despite its importance, routine LGE-based LV scar quantification is hindered by labor-intensive manual segmentation and inter-observer variability. Methods: We propose ScarNet, a hybrid model… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01372v1-abstract-full').style.display = 'inline'; document.getElementById('2501.01372v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.01372v1-abstract-full" style="display: none;"> Background: Late Gadolinium Enhancement (LGE) imaging is the gold standard for assessing myocardial fibrosis and scarring, with left ventricular (LV) LGE extent predicting major adverse cardiac events (MACE). Despite its importance, routine LGE-based LV scar quantification is hindered by labor-intensive manual segmentation and inter-observer variability. Methods: We propose ScarNet, a hybrid model combining a transformer-based encoder from the Medical Segment Anything Model (MedSAM) with a convolution-based U-Net decoder, enhanced by tailored attention blocks. ScarNet was trained on 552 ischemic cardiomyopathy patients with expert segmentations of myocardial and scar boundaries and tested on 184 separate patients. Results: ScarNet achieved robust scar segmentation in 184 test patients, yielding a median Dice score of 0.912 (IQR: 0.863--0.944), significantly outperforming MedSAM (median Dice = 0.046, IQR: 0.043--0.047) and nnU-Net (median Dice = 0.638, IQR: 0.604--0.661). ScarNet demonstrated lower bias (-0.63%) and coefficient of variation (4.3%) compared to MedSAM (bias: -13.31%, CoV: 130.3%) and nnU-Net (bias: -2.46%, CoV: 20.3%). In Monte Carlo simulations with noise perturbations, ScarNet achieved significantly higher scar Dice (0.892 \pm 0.053, CoV = 5.9%) than MedSAM (0.048 \pm 0.112, CoV = 233.3%) and nnU-Net (0.615 \pm 0.537, CoV = 28.7%). Conclusion: ScarNet outperformed MedSAM and nnU-Net in accurately segmenting myocardial and scar boundaries in LGE images. The model exhibited robust performance across diverse image qualities and scar patterns. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01372v1-abstract-full').style.display = 'none'; document.getElementById('2501.01372v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">31 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.01197">arXiv:2501.01197</a> <span> [<a href="https://arxiv.org/pdf/2501.01197">pdf</a>, <a href="https://arxiv.org/format/2501.01197">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> LayeringDiff: Layered Image Synthesis via Generation, then Disassembly with Generative Knowledge </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kang%2C+K">Kyoungkook Kang</a>, <a href="/search/cs?searchtype=author&query=Sim%2C+G">Gyujin Sim</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+G">Geonung Kim</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Donguk Kim</a>, <a href="/search/cs?searchtype=author&query=Nam%2C+S">Seungho Nam</a>, <a href="/search/cs?searchtype=author&query=Cho%2C+S">Sunghyun Cho</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.01197v1-abstract-short" style="display: inline;"> Layers have become indispensable tools for professional artists, allowing them to build a hierarchical structure that enables independent control over individual visual elements. In this paper, we propose LayeringDiff, a novel pipeline for the synthesis of layered images, which begins by generating a composite image using an off-the-shelf image generative model, followed by disassembling the image… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01197v1-abstract-full').style.display = 'inline'; document.getElementById('2501.01197v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.01197v1-abstract-full" style="display: none;"> Layers have become indispensable tools for professional artists, allowing them to build a hierarchical structure that enables independent control over individual visual elements. In this paper, we propose LayeringDiff, a novel pipeline for the synthesis of layered images, which begins by generating a composite image using an off-the-shelf image generative model, followed by disassembling the image into its constituent foreground and background layers. By extracting layers from a composite image, rather than generating them from scratch, LayeringDiff bypasses the need for large-scale training to develop generative capabilities for individual layers. Furthermore, by utilizing a pretrained off-the-shelf generative model, our method can produce diverse contents and object scales in synthesized layers. For effective layer decomposition, we adapt a large-scale pretrained generative prior to estimate foreground and background layers. We also propose high-frequency alignment modules to refine the fine-details of the estimated layers. Our comprehensive experiments demonstrate that our approach effectively synthesizes layered images and supports various practical applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01197v1-abstract-full').style.display = 'none'; document.getElementById('2501.01197v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.01141">arXiv:2501.01141</a> <span> [<a href="https://arxiv.org/pdf/2501.01141">pdf</a>, <a href="https://arxiv.org/ps/2501.01141">ps</a>, <a href="https://arxiv.org/format/2501.01141">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> Embodied AI-Enhanced Vehicular Networks: An Integrated Large Language Models and Reinforcement Learning Method </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruichen Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+C">Changyuan Zhao</a>, <a href="/search/cs?searchtype=author&query=Du%2C+H">Hongyang Du</a>, <a href="/search/cs?searchtype=author&query=Niyato%2C+D">Dusit Niyato</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiacheng Wang</a>, <a href="/search/cs?searchtype=author&query=Sawadsitang%2C+S">Suttinee Sawadsitang</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+X">Xuemin Shen</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D+I">Dong In Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.01141v1-abstract-short" style="display: inline;"> This paper investigates adaptive transmission strategies in embodied AI-enhanced vehicular networks by integrating large language models (LLMs) for semantic information extraction and deep reinforcement learning (DRL) for decision-making. The proposed framework aims to optimize both data transmission efficiency and decision accuracy by formulating an optimization problem that incorporates the Webe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01141v1-abstract-full').style.display = 'inline'; document.getElementById('2501.01141v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.01141v1-abstract-full" style="display: none;"> This paper investigates adaptive transmission strategies in embodied AI-enhanced vehicular networks by integrating large language models (LLMs) for semantic information extraction and deep reinforcement learning (DRL) for decision-making. The proposed framework aims to optimize both data transmission efficiency and decision accuracy by formulating an optimization problem that incorporates the Weber-Fechner law, serving as a metric for balancing bandwidth utilization and quality of experience (QoE). Specifically, we employ the large language and vision assistant (LLAVA) model to extract critical semantic information from raw image data captured by embodied AI agents (i.e., vehicles), reducing transmission data size by approximately more than 90\% while retaining essential content for vehicular communication and decision-making. In the dynamic vehicular environment, we employ a generalized advantage estimation-based proximal policy optimization (GAE-PPO) method to stabilize decision-making under uncertainty. Simulation results show that attention maps from LLAVA highlight the model's focus on relevant image regions, enhancing semantic representation accuracy. Additionally, our proposed transmission strategy improves QoE by up to 36\% compared to DDPG and accelerates convergence by reducing required steps by up to 47\% compared to pure PPO. Further analysis indicates that adapting semantic symbol length provides an effective trade-off between transmission quality and bandwidth, achieving up to a 61.4\% improvement in QoE when scaling from 4 to 8 vehicles. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01141v1-abstract-full').style.display = 'none'; document.getElementById('2501.01141v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.00511">arXiv:2501.00511</a> <span> [<a href="https://arxiv.org/pdf/2501.00511">pdf</a>, <a href="https://arxiv.org/format/2501.00511">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> </div> </div> <p class="title is-5 mathjax"> Stochastic Extragradient with Flip-Flop Shuffling & Anchoring: Provable Improvements </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chae%2C+J">Jiseok Chae</a>, <a href="/search/cs?searchtype=author&query=Yun%2C+C">Chulhee Yun</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Donghwan Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.00511v1-abstract-short" style="display: inline;"> In minimax optimization, the extragradient (EG) method has been extensively studied because it outperforms the gradient descent-ascent method in convex-concave (C-C) problems. Yet, stochastic EG (SEG) has seen limited success in C-C problems, especially for unconstrained cases. Motivated by the recent progress of shuffling-based stochastic methods, we investigate the convergence of shuffling-based… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.00511v1-abstract-full').style.display = 'inline'; document.getElementById('2501.00511v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.00511v1-abstract-full" style="display: none;"> In minimax optimization, the extragradient (EG) method has been extensively studied because it outperforms the gradient descent-ascent method in convex-concave (C-C) problems. Yet, stochastic EG (SEG) has seen limited success in C-C problems, especially for unconstrained cases. Motivated by the recent progress of shuffling-based stochastic methods, we investigate the convergence of shuffling-based SEG in unconstrained finite-sum minimax problems, in search of convergent shuffling-based SEG. Our analysis reveals that both random reshuffling and the recently proposed flip-flop shuffling alone can suffer divergence in C-C problems. However, with an additional simple trick called anchoring, we develop the SEG with flip-flop anchoring (SEG-FFA) method which successfully converges in C-C problems. We also show upper and lower bounds in the strongly-convex-strongly-concave setting, demonstrating that SEG-FFA has a provably faster convergence rate compared to other shuffling-based methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.00511v1-abstract-full').style.display = 'none'; document.getElementById('2501.00511v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">73+7 pages, 4 figures. Published in NeurIPS 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.00318">arXiv:2501.00318</a> <span> [<a href="https://arxiv.org/pdf/2501.00318">pdf</a>, <a href="https://arxiv.org/format/2501.00318">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Improving Text-based Person Search via Part-level Cross-modal Correspondence </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Park%2C+J">Jicheol Park</a>, <a href="/search/cs?searchtype=author&query=Jeong%2C+B">Boseung Jeong</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Dongwon Kim</a>, <a href="/search/cs?searchtype=author&query=Kwak%2C+S">Suha Kwak</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.00318v1-abstract-short" style="display: inline;"> Text-based person search is the task of finding person images that are the most relevant to the natural language text description given as query. The main challenge of this task is a large gap between the target images and text queries, which makes it difficult to establish correspondence and distinguish subtle differences across people. To address this challenge, we introduce an efficient encoder… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.00318v1-abstract-full').style.display = 'inline'; document.getElementById('2501.00318v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.00318v1-abstract-full" style="display: none;"> Text-based person search is the task of finding person images that are the most relevant to the natural language text description given as query. The main challenge of this task is a large gap between the target images and text queries, which makes it difficult to establish correspondence and distinguish subtle differences across people. To address this challenge, we introduce an efficient encoder-decoder model that extracts coarse-to-fine embedding vectors which are semantically aligned across the two modalities without supervision for the alignment. There is another challenge of learning to capture fine-grained information with only person IDs as supervision, where similar body parts of different individuals are considered different due to the lack of part-level supervision. To tackle this, we propose a novel ranking loss, dubbed commonality-based margin ranking loss, which quantifies the degree of commonality of each body part and reflects it during the learning of fine-grained body part details. As a consequence, it enables our method to achieve the best records on three public benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.00318v1-abstract-full').style.display = 'none'; document.getElementById('2501.00318v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.19289">arXiv:2412.19289</a> <span> [<a href="https://arxiv.org/pdf/2412.19289">pdf</a>, <a href="https://arxiv.org/format/2412.19289">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> ViPCap: Retrieval Text-Based Visual Prompts for Lightweight Image Captioning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+T">Taewhan Kim</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+S">Soeun Lee</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+S">Si-Woo Kim</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Dong-Jin Kim</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.19289v3-abstract-short" style="display: inline;"> Recent lightweight image captioning models using retrieved data mainly focus on text prompts. However, previous works only utilize the retrieved text as text prompts, and the visual information relies only on the CLIP visual embedding. Because of this issue, there is a limitation that the image descriptions inherent in the prompt are not sufficiently reflected in the visual embedding space. To tac… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.19289v3-abstract-full').style.display = 'inline'; document.getElementById('2412.19289v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.19289v3-abstract-full" style="display: none;"> Recent lightweight image captioning models using retrieved data mainly focus on text prompts. However, previous works only utilize the retrieved text as text prompts, and the visual information relies only on the CLIP visual embedding. Because of this issue, there is a limitation that the image descriptions inherent in the prompt are not sufficiently reflected in the visual embedding space. To tackle this issue, we propose ViPCap, a novel retrieval text-based visual prompt for lightweight image captioning. ViPCap leverages the retrieved text with image information as visual prompts to enhance the ability of the model to capture relevant visual information. By mapping text prompts into the CLIP space and generating multiple randomized Gaussian distributions, our method leverages sampling to explore randomly augmented distributions and effectively retrieves the semantic features that contain image information. These retrieved features are integrated into the image and designated as the visual prompt, leading to performance improvements on the datasets such as COCO, Flickr30k, and NoCaps. Experimental results demonstrate that ViPCap significantly outperforms prior lightweight captioning models in efficiency and effectiveness, demonstrating the potential for a plug-and-play solution. The source code is available at https://github.com/taewhankim/VIPCAP. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.19289v3-abstract-full').style.display = 'none'; document.getElementById('2412.19289v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to AAAI 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.19104">arXiv:2412.19104</a> <span> [<a href="https://arxiv.org/pdf/2412.19104">pdf</a>, <a href="https://arxiv.org/format/2412.19104">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Improving Generative Pre-Training: An In-depth Study of Masked Image Modeling and Denoising Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Choi%2C+H">Hyesong Choi</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Daeun Kim</a>, <a href="/search/cs?searchtype=author&query=Cha%2C+S">Sungmin Cha</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+K+M">Kwang Moo Yi</a>, <a href="/search/cs?searchtype=author&query=Min%2C+D">Dongbo Min</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.19104v1-abstract-short" style="display: inline;"> In this work, we dive deep into the impact of additive noise in pre-training deep networks. While various methods have attempted to use additive noise inspired by the success of latent denoising diffusion models, when used in combination with masked image modeling, their gains have been marginal when it comes to recognition tasks. We thus investigate why this would be the case, in an attempt to fi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.19104v1-abstract-full').style.display = 'inline'; document.getElementById('2412.19104v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.19104v1-abstract-full" style="display: none;"> In this work, we dive deep into the impact of additive noise in pre-training deep networks. While various methods have attempted to use additive noise inspired by the success of latent denoising diffusion models, when used in combination with masked image modeling, their gains have been marginal when it comes to recognition tasks. We thus investigate why this would be the case, in an attempt to find effective ways to combine the two ideas. Specifically, we find three critical conditions: corruption and restoration must be applied within the encoder, noise must be introduced in the feature space, and an explicit disentanglement between noised and masked tokens is necessary. By implementing these findings, we demonstrate improved pre-training performance for a wide range of recognition tasks, including those that require fine-grained, high-frequency information to solve. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.19104v1-abstract-full').style.display = 'none'; document.getElementById('2412.19104v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.18750">arXiv:2412.18750</a> <span> [<a href="https://arxiv.org/pdf/2412.18750">pdf</a>, <a href="https://arxiv.org/format/2412.18750">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> The Impact of Input Order Bias on Large Language Models for Software Fault Localization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Rafi%2C+M+N">Md Nakhla Rafi</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D+J">Dong Jae Kim</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+T">Tse-Hsun Chen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shaowei Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.18750v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) show great promise in software engineering tasks like Fault Localization (FL) and Automatic Program Repair (APR). This study examines how input order and context size affect LLM performance in FL, a key step for many downstream software engineering tasks. We test different orders for methods using Kendall Tau distances, including "perfect" (where ground truths come fir… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18750v1-abstract-full').style.display = 'inline'; document.getElementById('2412.18750v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.18750v1-abstract-full" style="display: none;"> Large Language Models (LLMs) show great promise in software engineering tasks like Fault Localization (FL) and Automatic Program Repair (APR). This study examines how input order and context size affect LLM performance in FL, a key step for many downstream software engineering tasks. We test different orders for methods using Kendall Tau distances, including "perfect" (where ground truths come first) and "worst" (where ground truths come last). Our results show a strong bias in order, with Top-1 accuracy falling from 57\% to 20\% when we reverse the code order. Breaking down inputs into smaller contexts helps reduce this bias, narrowing the performance gap between perfect and worst orders from 22\% to just 1\%. We also look at ordering methods based on traditional FL techniques and metrics. Ordering using DepGraph's ranking achieves 48\% Top-1 accuracy, better than more straightforward ordering approaches like CallGraph. These findings underscore the importance of how we structure inputs, manage contexts, and choose ordering methods to improve LLM performance in FL and other software engineering tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18750v1-abstract-full').style.display = 'none'; document.getElementById('2412.18750v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=kim%2C+D&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=kim%2C+D&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=kim%2C+D&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=kim%2C+D&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=kim%2C+D&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=kim%2C+D&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository