CINXE.COM

<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"> <head> <title>Computer Science </title> <meta name="viewport" content="width=device-width, initial-scale=1"> <link rel="apple-touch-icon" sizes="180x180" href="/static/browse/0.3.4/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="/static/browse/0.3.4/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="/static/browse/0.3.4/images/icons/favicon-16x16.png"> <link rel="manifest" href="/static/browse/0.3.4/images/icons/site.webmanifest"> <link rel="mask-icon" href="/static/browse/0.3.4/images/icons/safari-pinned-tab.svg" color="#5bbad5"> <meta name="msapplication-TileColor" content="#da532c"> <meta name="theme-color" content="#ffffff"> <link rel="stylesheet" type="text/css" media="screen" href="/static/browse/0.3.4/css/arXiv.css?v=20240822" /> <link rel="stylesheet" type="text/css" media="print" href="/static/browse/0.3.4/css/arXiv-print.css?v=20200611" /> <link rel="stylesheet" type="text/css" media="screen" href="/static/browse/0.3.4/css/browse_search.css" /> <script language="javascript" src="/static/browse/0.3.4/js/accordion.js" /></script> <script src="/static/browse/0.3.4/js/mathjaxToggle.min.js" type="text/javascript"></script> <script type="text/javascript" language="javascript">mathjaxToggle();</script> </head> <body class="with-cu-identity"> <div class="flex-wrap-footer"> <header> <a href="#content" class="is-sr-only">Skip to main content</a>  <div class="columns is-vcentered is-hidden-mobile" id="cu-identity"> <div class="column" id="cu-logo"> <a href="https://www.cornell.edu/"><img src="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg" alt="Cornell University" /></a> </div><div class="column" id="support-ack"> <span id="support-ack-url">We gratefully acknowledge support from the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors.</span> <a href="https://info.arxiv.org/about/donate.html" class="btn-header-donate">Donate</a> </div> </div> <div id="header" class="is-hidden-mobile"> <a aria-hidden="true" tabindex="-1" href="/IgnoreMe"></a> <div class="header-breadcrumbs"> <a href="/"><img src="/static/browse/0.3.4/images/arxiv-logo-one-color-white.svg" alt="arxiv logo" style="height:40px;"/></a> <span>></span> <a href="/list/cs/recent">cs</a> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <div class="mobile-header"> <div class="columns is-mobile"> <div class="column logo-arxiv"><a href="https://arxiv.org/"><img src="/static/browse/0.3.4/images/arxiv-logomark-small-white.svg" alt="arXiv logo" style="height:60px;" /></a></div> <div class="column logo-cornell"><a href="https://www.cornell.edu/"> <picture> <source media="(min-width: 501px)" srcset="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg 400w" sizes="400w" /> <source srcset="/static/browse/0.3.4/images/icons/cu/cornell_seal_simple_black.svg 2x" /> <img src="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg" alt="Cornell University Logo" /> </picture> </a></div> <div class="column nav" id="toggle-container" role="menubar"> <button class="toggle-control"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-white"><title>open search</title><path d="M505 442.7L405.3 343c-4.5-4.5-10.6-7-17-7H372c27.6-35.3 44-79.7 44-128C416 93.1 322.9 0 208 0S0 93.1 0 208s93.1 208 208 208c48.3 0 92.7-16.4 128-44v16.3c0 6.4 2.5 12.5 7 17l99.7 99.7c9.4 9.4 24.6 9.4 33.9 0l28.3-28.3c9.4-9.4 9.4-24.6.1-34zM208 336c-70.7 0-128-57.2-128-128 0-70.7 57.2-128 128-128 70.7 0 128 57.2 128 128 0 70.7-57.2 128-128 128z"/></svg></button> <div class="mobile-toggle-block toggle-target"> <form class="mobile-search-form" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <input class="input" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <input type="hidden" name="source" value="header"> <input type="hidden" name="searchtype" value="all"> <button class="button">GO</button> </div> </form> </div> <button class="toggle-control"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-white" role="menu"><title>open navigation menu</title><path d="M16 132h416c8.837 0 16-7.163 16-16V76c0-8.837-7.163-16-16-16H16C7.163 60 0 67.163 0 76v40c0 8.837 7.163 16 16 16zm0 160h416c8.837 0 16-7.163 16-16v-40c0-8.837-7.163-16-16-16H16c-8.837 0-16 7.163-16 16v40c0 8.837 7.163 16 16 16zm0 160h416c8.837 0 16-7.163 16-16v-40c0-8.837-7.163-16-16-16H16c-8.837 0-16 7.163-16 16v40c0 8.837 7.163 16 16 16z"/ ></svg></button> <div class="mobile-toggle-block toggle-target"> <nav class="mobile-menu" aria-labelledby="mobilemenulabel"> <h2 id="mobilemenulabel">quick links</h2> <ul> <li><a href="https://arxiv.org/login">Login</a></li> <li><a href="https://info.arxiv.org/help">Help Pages</a></li> <li><a href="https://info.arxiv.org/about">About</a></li> </ul> </nav> </div> </div> </div> </div> </header> <main> <div id="content"> <div id='content-inner'> <div id='dlpage'> <h1>Computer Science</h1> <ul> <li><a href="#item0">New submissions</a></li> <li><a href="#item363">Cross-lists</a></li> <li><a href="#item423">Replacements</a></li> </ul> <p>See <a id="recent-cs" aria-labelledby="recent-cs" href="/list/cs/recent">recent</a> articles</p> <h3>Showing new listings for Friday, 22 November 2024</h3> <div class='paging'>Total of 669 entries </div> <div class='morefewer'>Showing up to 2000 entries per page: <a href=/list/cs/new?skip=0&show=1000 rel="nofollow"> fewer</a> | <span style="color: #454545">more</span> | <span style="color: #454545">all</span> </div> <dl id='articles'> <h3>New submissions (showing 362 of 362 entries)</h3> <dt> <a name='item1'>[1]</a> <a href ="/abs/2411.13560" title="Abstract" id="2411.13560"> arXiv:2411.13560 </a> [<a href="/pdf/2411.13560" title="Download PDF" id="pdf-2411.13560" aria-labelledby="pdf-2411.13560">pdf</a>, <a href="https://arxiv.org/html/2411.13560v1" title="View HTML" id="html-2411.13560" aria-labelledby="html-2411.13560" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13560" title="Other formats" id="oth-2411.13560" aria-labelledby="oth-2411.13560">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> AMSnet-KG: A Netlist Dataset for LLM-based AMS Circuit Auto-Design Using Knowledge Graph RAG </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Shi,+Y">Yichen Shi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tao,+Z">Zhuofu Tao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gao,+Y">Yuhao Gao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+T">Tianjia Zhou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chang,+C">Cheng Chang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Y">Yaxing Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+B">Bingyu Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+G">Genhao Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+A">Alvin Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+Z">Zhiping Yu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lin,+T">Ting-Jung Lin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=He,+L">Lei He</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Artificial Intelligence (cs.AI)</span>; Hardware Architecture (cs.AR); Emerging Technologies (cs.ET); Signal Processing (eess.SP) </div> <p class='mathjax'> High-performance analog and mixed-signal (AMS) circuits are mainly full-custom designed, which is time-consuming and labor-intensive. A significant portion of the effort is experience-driven, which makes the automation of AMS circuit design a formidable challenge. Large language models (LLMs) have emerged as powerful tools for Electronic Design Automation (EDA) applications, fostering advancements in the automatic design process for large-scale AMS circuits. However, the absence of high-quality datasets has led to issues such as model hallucination, which undermines the robustness of automatically generated circuit designs. To address this issue, this paper introduces AMSnet-KG, a dataset encompassing various AMS circuit schematics and netlists. We construct a knowledge graph with annotations on detailed functional and performance characteristics. Facilitated by AMSnet-KG, we propose an automated AMS circuit generation framework that utilizes the comprehensive knowledge embedded in LLMs. We first formulate a design strategy (e.g., circuit architecture using a number of circuit components) based on required specifications. Next, matched circuit components are retrieved and assembled into a complete topology, and transistor sizing is obtained through Bayesian optimization. Simulation results of the netlist are fed back to the LLM for further topology refinement, ensuring the circuit design specifications are met. We perform case studies of operational amplifier and comparator design to verify the automatic design flow from specifications to netlists with minimal human effort. The dataset used in this paper will be open-sourced upon publishing of this paper. </p> </div> </dd> <dt> <a name='item2'>[2]</a> <a href ="/abs/2411.13561" title="Abstract" id="2411.13561"> arXiv:2411.13561 </a> [<a href="/pdf/2411.13561" title="Download PDF" id="pdf-2411.13561" aria-labelledby="pdf-2411.13561">pdf</a>, <a href="https://arxiv.org/html/2411.13561v1" title="View HTML" id="html-2411.13561" aria-labelledby="html-2411.13561" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13561" title="Other formats" id="oth-2411.13561" aria-labelledby="oth-2411.13561">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Model discovery on the fly using continuous data assimilation </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Newey,+J">Joshua Newey</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Whitehead,+J+P">Jared P Whitehead</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Carlson,+E">Elizabeth Carlson</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Numerical Analysis (math.NA)</span>; Dynamical Systems (math.DS); Data Analysis, Statistics and Probability (physics.data-an) </div> <p class='mathjax'> We review an algorithm developed for parameter estimation within the Continuous Data Assimilation (CDA) approach. We present an alternative derivation for the algorithm presented in a paper by Carlson, Hudson, and Larios (CHL, 2021). This derivation relies on the same assumptions as the previous derivation but frames the problem as a finite dimensional root-finding problem. Within the approach we develop, the algorithm developed in (CHL, 2021) is simply a realization of Newton's method. We then consider implementing other derivative based optimization algorithms; we show that the Levenberg Maqrquardt algorithm has similar performance to the CHL algorithm in the single parameter estimation case and generalizes much better to fitting multiple parameters. We then implement these methods in three example systems: the Lorenz '63 model, the two-layer Lorenz '96 model, and the Kuramoto-Sivashinsky equation. </p> </div> </dd> <dt> <a name='item3'>[3]</a> <a href ="/abs/2411.13566" title="Abstract" id="2411.13566"> arXiv:2411.13566 </a> [<a href="/pdf/2411.13566" title="Download PDF" id="pdf-2411.13566" aria-labelledby="pdf-2411.13566">pdf</a>, <a href="https://arxiv.org/html/2411.13566v1" title="View HTML" id="html-2411.13566" aria-labelledby="html-2411.13566" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13566" title="Other formats" id="oth-2411.13566" aria-labelledby="oth-2411.13566">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Integrated Water Resource Management in the Segura Hydrographic Basin: An Artificial Intelligence Approach </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Otamendi,+U">Urtzi Otamendi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Maiza,+M">Mikel Maiza</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Olaizola,+I+G">Igor G. Olaizola</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sierra,+B">Basilio Sierra</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Flores,+M">Markel Flores</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Quartulli,+M">Marco Quartulli</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 15 pages, 14 figures, 8 tables </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> Journal of Environmental Management, Volume 370, 2024, ISSN 0301-4797 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Artificial Intelligence (cs.AI)</span> </div> <p class='mathjax'> Managing resources effectively in uncertain demand, variable availability, and complex governance policies is a significant challenge. This paper presents a paradigmatic framework for addressing these issues in water management scenarios by integrating advanced physical modelling, remote sensing techniques, and Artificial Intelligence algorithms. The proposed approach accurately predicts water availability, estimates demand, and optimizes resource allocation on both short- and long-term basis, combining a comprehensive hydrological model, agronomic crop models for precise demand estimation, and Mixed-Integer Linear Programming for efficient resource distribution. In the study case of the Segura Hydrographic Basin, the approach successfully allocated approximately 642 million cubic meters ($hm^3$) of water over six months, minimizing the deficit to 9.7% of the total estimated demand. The methodology demonstrated significant environmental benefits, reducing CO2 emissions while optimizing resource distribution. This robust solution supports informed decision-making processes, ensuring sustainable water management across diverse contexts. The generalizability of this approach allows its adaptation to other basins, contributing to improved governance and policy implementation on a broader scale. Ultimately, the methodology has been validated and integrated into the operational water management practices in the Segura Hydrographic Basin in Spain. </p> </div> </dd> <dt> <a name='item4'>[4]</a> <a href ="/abs/2411.13569" title="Abstract" id="2411.13569"> arXiv:2411.13569 </a> [<a href="/pdf/2411.13569" title="Download PDF" id="pdf-2411.13569" aria-labelledby="pdf-2411.13569">pdf</a>, <a href="https://arxiv.org/html/2411.13569v1" title="View HTML" id="html-2411.13569" aria-labelledby="html-2411.13569" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13569" title="Other formats" id="oth-2411.13569" aria-labelledby="oth-2411.13569">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Unconditionally stable symplectic integrators for the Navier-Stokes equations and other dissipative systems </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Sungkeetanon,+S">Sutthikiat Sungkeetanon</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Gaglione,+J+S">Joseph S. Gaglione</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Chapman,+R+L">Robert L. Chapman</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Kelly,+T+M">Tyler M. Kelly</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Cushman,+H+A">Howard A. Cushman</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Odom,+B+H">Blakeley H. Odom</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=MacGavin,+B">Bryan MacGavin</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Elamin,+G+A">Gafar A. Elamin</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Washuta,+N+J">Nathan J. Washuta</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Crosmer,+J+E">Jonathan E. Crosmer</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=DeVoria,+A+C">Adam C. DeVoria</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Sanders,+J+W">John W. Sanders</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 18 pages, 7 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Numerical Analysis (math.NA)</span>; Dynamical Systems (math.DS); Fluid Dynamics (physics.flu-dyn) </div> <p class='mathjax'> Symplectic integrators offer vastly superior performance over traditional numerical techniques for conservative dynamical systems, but their application to \emph{dissipative} systems is inherently difficult due to dissipative systems' lack of symplectic structure. Leveraging the intrinsic variational structure of higher-order dynamics, this paper presents a general technique for applying existing symplectic integration schemes to dissipative systems, with particular emphasis on viscous fluids modeled by the Navier-Stokes equations. Two very simple such schemes are developed here. Not only are these schemes unconditionally stable for dissipative systems, they also outperform traditional methods with a similar degree of complexity in terms of accuracy for a given time step. For example, in the case of viscous flow between two infinite, flat plates, one of the schemes developed here is found to outperform both the implicit Euler method and the explicit fourth-order Runge-Kutta method in predicting the velocity profile. To the authors' knowledge, this is the very first time that a symplectic integration scheme has been applied successfully to the Navier-Stokes equations. We interpret the present success as direct empirical validation of the canonical Hamiltonian formulation of the Navier-Stokes problem recently published by Sanders~\emph{et al.} More sophisticated symplectic integration schemes are expected to exhibit even greater performance. It is hoped that these results will lead to improved numerical methods in computational fluid dynamics. </p> </div> </dd> <dt> <a name='item5'>[5]</a> <a href ="/abs/2411.13571" title="Abstract" id="2411.13571"> arXiv:2411.13571 </a> [<a href="/pdf/2411.13571" title="Download PDF" id="pdf-2411.13571" aria-labelledby="pdf-2411.13571">pdf</a>, <a href="https://arxiv.org/html/2411.13571v1" title="View HTML" id="html-2411.13571" aria-labelledby="html-2411.13571" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13571" title="Other formats" id="oth-2411.13571" aria-labelledby="oth-2411.13571">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A low-rank balanced truncation approach for large-scale RLCk model order reduction based on extended Krylov subspace and a frequency-aware convergence criterion </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Giamouzis,+C">Christos Giamouzis</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Garyfallou,+D">Dimitrios Garyfallou</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Evmorfopoulos,+N">Nestor Evmorfopoulos</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Stamoulis,+G">George Stamoulis</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> arXiv admin note: substantial text overlap with <a href="https://arxiv.org/abs/2311.08478" data-arxiv-id="2311.08478" class="link-https">arXiv:2311.08478</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Numerical Analysis (math.NA)</span>; Hardware Architecture (cs.AR); Computational Engineering, Finance, and Science (cs.CE) </div> <p class='mathjax'> Model order reduction (MOR) is essential in integrated circuit design, particularly when dealing with large-scale electromagnetic models extracted from complex designs. The numerous passive elements introduced in these models pose significant challenges in the simulation process. MOR methods based on balanced truncation (BT) help address these challenges by producing compact reduced-order models (ROMs) that preserve the original model's input-output port behavior. In this work, we present an extended Krylov subspace-based BT approach with a frequency-aware convergence criterion and efficient implementation techniques for reducing large-scale models. Experimental results indicate that our method generates accurate and compact ROMs while achieving up to x22 smaller ROMs with similar accuracy compared to ANSYS RaptorX ROMs for large-scale benchmarks. </p> </div> </dd> <dt> <a name='item6'>[6]</a> <a href ="/abs/2411.13572" title="Abstract" id="2411.13572"> arXiv:2411.13572 </a> [<a href="/pdf/2411.13572" title="Download PDF" id="pdf-2411.13572" aria-labelledby="pdf-2411.13572">pdf</a>, <a href="https://arxiv.org/html/2411.13572v1" title="View HTML" id="html-2411.13572" aria-labelledby="html-2411.13572" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13572" title="Other formats" id="oth-2411.13572" aria-labelledby="oth-2411.13572">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Public Health Advocacy Dataset: A Dataset of Tobacco Usage Videos from Social Media </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Chappa,+N+V+R">Naga VS Raviteja Chappa</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=McCormick,+C">Charlotte McCormick</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gongora,+S+R">Susana Rodriguez Gongora</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dobbs,+P+D">Page Daniel Dobbs</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Luu,+K">Khoa Luu</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Under review at International Journal of Computer Vision (IJCV); 29 figures, 5 figures; </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> The Public Health Advocacy Dataset (PHAD) is a comprehensive collection of 5,730 videos related to tobacco products sourced from social media platforms like TikTok and YouTube. This dataset encompasses 4.3 million frames and includes detailed metadata such as user engagement metrics, video descriptions, and search keywords. This is the first dataset with these features providing a valuable resource for analyzing tobacco-related content and its impact. Our research employs a two-stage classification approach, incorporating a Vision-Language (VL) Encoder, demonstrating superior performance in accurately categorizing various types of tobacco products and usage scenarios. The analysis reveals significant user engagement trends, particularly with vaping and e-cigarette content, highlighting areas for targeted public health interventions. The PHAD addresses the need for multi-modal data in public health research, offering insights that can inform regulatory policies and public health strategies. This dataset is a crucial step towards understanding and mitigating the impact of tobacco usage, ensuring that public health efforts are more inclusive and effective. </p> </div> </dd> <dt> <a name='item7'>[7]</a> <a href ="/abs/2411.13573" title="Abstract" id="2411.13573"> arXiv:2411.13573 </a> [<a href="/pdf/2411.13573" title="Download PDF" id="pdf-2411.13573" aria-labelledby="pdf-2411.13573">pdf</a>, <a href="/format/2411.13573" title="Other formats" id="oth-2411.13573" aria-labelledby="oth-2411.13573">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Higher-Order Spectral Element Methods for Electromagnetic Modeling of Complex Anisotropic Waveguides </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Ribeiro,+R+O">Raul Oliveira Ribeiro</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Ph.D. Thesis in Electrical Engineering at the Pontifical Catholic University of Rio de Janeiro </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> Maxwell, PUC-Rio, 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Numerical Analysis (math.NA)</span>; Computational Engineering, Finance, and Science (cs.CE) </div> <p class='mathjax'> This research thesis presents a novel higher-order spectral element method (SEM) formulated in cylindrical coordinates for analyzing electromagnetic fields in waveguides filled with complex anisotropic media. In this study, we consider a large class of cylindrical waveguides: radially-bounded and radially-unbounded domains; homogeneous and inhomogeneous waveguides; concentric and non-concentric geometries; Hermitian and non-Hermitian anisotropic media tensors. This work explores different wave equation formulations for one-layer eccentric and multilayer cylindrical waveguides. For the first case, we can define a new normalized scalar Helmholtz equation for decoupling TM and TE modes, and for the second, a vectorial Helmholtz equation for hybrid modes in multilayered anisotropic structures. Additionally, we formulate a transformation optics (TO) framework to include non-symmetric and non-Hermitian media tensors for non-concentric multilayer waveguides. Lastly, we model excitation sources for logging sensors applied in geophysical problems using the fields obtained by SEM. We validate the proposed approach against analytical solutions, perturbation-based and mode-matching-based methods, finite-elements, and finite-integration numerical methods. Our technique obtains accurate results with fewer elements and degrees of freedom (DoF) than Cartesian-based SEM and ordinary finite-element approaches. To this end, we use higher-order two-dimensional basis functions associated with the zeros of the completed Lobatto polynomial to model the fields in each reference element. The convergence analysis demonstrates the absence of the Runge effect as the expansion order increases. Numerical results show that our formulation is efficient and accurate for modeling cylindrical waveguided geometries filled with complex media. </p> </div> </dd> <dt> <a name='item8'>[8]</a> <a href ="/abs/2411.13578" title="Abstract" id="2411.13578"> arXiv:2411.13578 </a> [<a href="/pdf/2411.13578" title="Download PDF" id="pdf-2411.13578" aria-labelledby="pdf-2411.13578">pdf</a>, <a href="https://arxiv.org/html/2411.13578v1" title="View HTML" id="html-2411.13578" aria-labelledby="html-2411.13578" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13578" title="Other formats" id="oth-2411.13578" aria-labelledby="oth-2411.13578">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> COOD: Concept-based Zero-shot OOD Detection </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Z">Zhendong Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nian,+Y">Yi Nian</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zou,+H+P">Henry Peng Zou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+L">Li Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hu,+X">Xiyang Hu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+Y">Yue Zhao</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> How can models effectively detect out-of-distribution (OOD) samples in complex, multi-label settings without extensive retraining? Existing OOD detection methods struggle to capture the intricate semantic relationships and label co-occurrences inherent in multi-label settings, often requiring large amounts of training data and failing to generalize to unseen label combinations. While large language models have revolutionized zero-shot OOD detection, they primarily focus on single-label scenarios, leaving a critical gap in handling real-world tasks where samples can be associated with multiple interdependent labels. To address these challenges, we introduce COOD, a novel zero-shot multi-label OOD detection framework. COOD leverages pre-trained vision-language models, enhancing them with a concept-based label expansion strategy and a new scoring function. By enriching the semantic space with both positive and negative concepts for each label, our approach models complex label dependencies, precisely differentiating OOD samples without the need for additional training. Extensive experiments demonstrate that our method significantly outperforms existing approaches, achieving approximately 95% average AUROC on both VOC and COCO datasets, while maintaining robust performance across varying numbers of labels and different types of OOD samples. </p> </div> </dd> <dt> <a name='item9'>[9]</a> <a href ="/abs/2411.13580" title="Abstract" id="2411.13580"> arXiv:2411.13580 </a> [<a href="/pdf/2411.13580" title="Download PDF" id="pdf-2411.13580" aria-labelledby="pdf-2411.13580">pdf</a>, <a href="/format/2411.13580" title="Other formats" id="oth-2411.13580" aria-labelledby="oth-2411.13580">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Multi-Server Information-Sharing Environment for Cross-Party Collaboration on A Private Cloud </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+J">Jianping Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Q">Qiang Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hu,+Z">Zhenzhong Hu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lin,+J">Jiarui Lin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+F">Fangqiang Yu</a></div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> Automation in Construction,2017 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Cryptography and Security (cs.CR)</span>; Distributed, Parallel, and Cluster Computing (cs.DC) </div> <p class='mathjax'> Interoperability remains the key problem in multi-discipline collaboration based on building information modeling (BIM). Although various methods have been proposed to solve the technical issues of interoperability, such as data sharing and data consistency; organizational issues, including data ownership and data privacy, remain unresolved to date. These organizational issues prevent different stakeholders from sharing their data due to concerns regarding losing control of the data. This study proposes a multi-server information-sharing approach on a private cloud after analyzing the requirements for cross-party collaboration to address the aforementioned issues and prepare for massive data handling in the near future. This approach adopts a global controller to track the location, ownership and privacy of the data, which are stored in different servers that are controlled by different parties. Furthermore, data consistency conventions, parallel sub-model extraction, and sub-model integration with model verification are investigated in depth to support information sharing in a distributed environment and to maintain data consistency. Thus, with this approach, the ownership and privacy of the data can be controlled by its owner while still enabling certain required data to be shared with other parties. Application of the multi-server approach for information interoperability and cross-party collaboration is illustrated using a real construction project of an airport terminal. Validation shows that the proposed approach is feasible for maintaining the ownership and privacy of the data while supporting cross-party data sharing and collaboration at the same time, thus avoiding possible legal problems regarding data copyrights or other legal issues. </p> </div> </dd> <dt> <a name='item10'>[10]</a> <a href ="/abs/2411.13581" title="Abstract" id="2411.13581"> arXiv:2411.13581 </a> [<a href="/pdf/2411.13581" title="Download PDF" id="pdf-2411.13581" aria-labelledby="pdf-2411.13581">pdf</a>, <a href="/format/2411.13581" title="Other formats" id="oth-2411.13581" aria-labelledby="oth-2411.13581">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Browser Extension for Fake URL Detection </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Malik,+L+G">Latesh G. Malik</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shambharkar,+R">Rohini Shambharkar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Morey,+S">Shivam Morey</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kanpate,+S">Shubhlak Kanpate</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Raut,+V">Vedika Raut</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 5 Pages, 2 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Cryptography and Security (cs.CR)</span>; Computational Engineering, Finance, and Science (cs.CE); Computers and Society (cs.CY); Machine Learning (cs.LG) </div> <p class='mathjax'> In recent years, Cyber attacks have increased in number, and with them, the intensity of the attacks and their potential to damage the user have also increased significantly. In an ever-advancing world, users find it difficult to keep up with the latest developments in technology, which can leave them vulnerable to attacks. To avoid such situations we need tools to deter such attacks, for this machine learning models are among the best options. This paper presents a Browser Extension that uses machine learning models to enhance online security by integrating three crucial functionalities: Malicious URL detection, Spam Email detection and Network logs analysis. The proposed solution uses LGBM classifier for classification of Phishing websites, the model has been trained on a dataset with 87 features, this model achieved an accuracy of 96.5% with a precision of 96.8% and F1 score of 96.49%. The Model for Spam email detection uses Multinomial NB algorithm which has been trained on a dataset with over 5500 messages, this model achieved an accuracy of 97.09% with a precision of 100%. The results demonstrate the effectiveness of using machine learning models for cyber security. </p> </div> </dd> <dt> <a name='item11'>[11]</a> <a href ="/abs/2411.13582" title="Abstract" id="2411.13582"> arXiv:2411.13582 </a> [<a href="/pdf/2411.13582" title="Download PDF" id="pdf-2411.13582" aria-labelledby="pdf-2411.13582">pdf</a>, <a href="https://arxiv.org/html/2411.13582v1" title="View HTML" id="html-2411.13582" aria-labelledby="html-2411.13582" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13582" title="Other formats" id="oth-2411.13582" aria-labelledby="oth-2411.13582">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Deep Feature Response Discriminative Calibration </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+W">Wenxiang Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Qiu,+T">Tian Qiu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+L">Linyun Zhou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Feng,+Z">Zunlei Feng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Song,+M">Mingli Song</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+H">Huiqiong Wang</a></div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> Neurocomputing 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Deep neural networks (DNNs) have numerous applications across various domains. Several optimization techniques, such as ResNet and SENet, have been proposed to improve model accuracy. These techniques improve the model performance by adjusting or calibrating feature responses according to a uniform standard. However, they lack the discriminative calibration for different features, thereby introducing limitations in the model output. Therefore, we propose a method that discriminatively calibrates feature responses. The preliminary experimental results indicate that the neural feature response follows a Gaussian distribution. Consequently, we compute confidence values by employing the Gaussian probability density function, and then integrate these values with the original response values. The objective of this integration is to improve the feature discriminability of the neural feature response. Based on the calibration values, we propose a plugin-based calibration module incorporated into a modified ResNet architecture, termed Response Calibration Networks (ResCNet). Extensive experiments on datasets like CIFAR-10, CIFAR-100, SVHN, and ImageNet demonstrate the effectiveness of the proposed approach. The developed code is publicly available at <a href="https://github.com/tcmyxc/ResCNet" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item12'>[12]</a> <a href ="/abs/2411.13583" title="Abstract" id="2411.13583"> arXiv:2411.13583 </a> [<a href="/pdf/2411.13583" title="Download PDF" id="pdf-2411.13583" aria-labelledby="pdf-2411.13583">pdf</a>, <a href="https://arxiv.org/html/2411.13583v1" title="View HTML" id="html-2411.13583" aria-labelledby="html-2411.13583" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13583" title="Other formats" id="oth-2411.13583" aria-labelledby="oth-2411.13583">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Enhanced FIWARE-Based Architecture for Cyberphysical Systems With Tiny Machine Learning and Machine Learning Operations: A Case Study on Urban Mobility Systems </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Conde,+J">Javier Conde</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Munoz-Arcentales,+A">Andr茅s Munoz-Arcentales</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Alonso,+%C3%81">脕lvaro Alonso</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Salvach%C3%BAa,+J">Joaqu铆n Salvach煤a</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huecas,+G">Gabriel Huecas</a></div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> IT Professional ( Volume: 26, Issue: 5, Sept.-Oct. 2024) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Cryptography and Security (cs.CR)</span>; Artificial Intelligence (cs.AI); Distributed, Parallel, and Cluster Computing (cs.DC); Networking and Internet Architecture (cs.NI) </div> <p class='mathjax'> The rise of AI and the Internet of Things is accelerating the digital transformation of society. Mobility computing presents specific barriers due to its real-time requirements, decentralization, and connectivity through wireless networks. New research on edge computing and tiny machine learning (tinyML) explores the execution of AI models on low-performance devices to address these issues. However, there are not many studies proposing agnostic architectures that manage the entire lifecycle of intelligent cyberphysical systems. This article extends a previous architecture based on FIWARE software components to implement the machine learning operations flow, enabling the management of the entire tinyML lifecycle in cyberphysical systems. We also provide a use case to showcase how to implement the FIWARE architecture through a complete example of a smart traffic system. We conclude that the FIWARE ecosystem constitutes a real reference option for developing tinyML and edge computing in cyberphysical systems. </p> </div> </dd> <dt> <a name='item13'>[13]</a> <a href ="/abs/2411.13584" title="Abstract" id="2411.13584"> arXiv:2411.13584 </a> [<a href="/pdf/2411.13584" title="Download PDF" id="pdf-2411.13584" aria-labelledby="pdf-2411.13584">pdf</a>, <a href="https://arxiv.org/html/2411.13584v1" title="View HTML" id="html-2411.13584" aria-labelledby="html-2411.13584" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13584" title="Other formats" id="oth-2411.13584" aria-labelledby="oth-2411.13584">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> AddrLLM: Address Rewriting via Large Language Model on Nationwide Logistics Data </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+Q">Qinchen Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hong,+Z">Zhiqing Hong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cao,+D">Dongjiang Cao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+H">Haotian Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xie,+Z">Zejun Xie</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=He,+T">Tian He</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Y">Yunhuai Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+Y">Yu Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+D">Desheng Zhang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted by KDD'25 ADS Track </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Textual description of a physical location, commonly known as an address, plays an important role in location-based services(LBS) such as on-demand delivery and navigation. However, the prevalence of abnormal addresses, those containing inaccuracies that fail to pinpoint a location, have led to significant costs. Address rewriting has emerged as a solution to rectify these abnormal addresses. Despite the critical need, existing address rewriting methods are limited, typically tailored to correct specific error types, or frequently require retraining to process new address data effectively. In this study, we introduce AddrLLM, an innovative framework for address rewriting that is built upon a retrieval augmented large language model. AddrLLM overcomes aforementioned limitations through a meticulously designed Supervised Fine-Tuning module, an Address-centric Retrieval Augmented Generation module and a Bias-free Objective Alignment module. To the best of our knowledge, this study pioneers the application of LLM-based address rewriting approach to solve the issue of abnormal addresses. Through comprehensive offline testing with real-world data on a national scale and subsequent online deployment, AddrLLM has demonstrated superior performance in integration with existing logistics system. It has significantly decreased the rate of parcel re-routing by approximately 43\%, underscoring its exceptional efficacy in real-world applications. </p> </div> </dd> <dt> <a name='item14'>[14]</a> <a href ="/abs/2411.13585" title="Abstract" id="2411.13585"> arXiv:2411.13585 </a> [<a href="/pdf/2411.13585" title="Download PDF" id="pdf-2411.13585" aria-labelledby="pdf-2411.13585">pdf</a>, <a href="/format/2411.13585" title="Other formats" id="oth-2411.13585" aria-labelledby="oth-2411.13585">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Artificial Intelligence in Cybersecurity: Building Resilient Cyber Diplomacy Frameworks </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Stoltz,+M">Michael Stoltz</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Cryptography and Security (cs.CR)</span>; Artificial Intelligence (cs.AI); Computers and Society (cs.CY) </div> <p class='mathjax'> This paper explores how automation and artificial intelligence (AI) are transforming U.S. cyber diplomacy. Leveraging these technologies helps the U.S. manage the complexity and urgency of cyber diplomacy, improving decision-making, efficiency, and security. As global inter connectivity grows, cyber diplomacy, managing national interests in the digital space has become vital. The ability of AI and automation to quickly process vast data volumes enables timely responses to cyber threats and opportunities. This paper underscores the strategic integration of these tools to maintain U.S. competitive advantage and secure national interests. Automation enhances diplomatic communication and data processing, freeing diplomats to focus on strategic decisions. AI supports predictive analytics and real time decision making, offering critical insights and proactive measures during high stakes engagements. Case studies show AIs effectiveness in monitoring cyber activities and managing international cyber policy. Challenges such as ethical concerns, security vulnerabilities, and reliance on technology are also addressed, emphasizing human oversight and strong governance frameworks. Ensuring proper ethical guidelines and cybersecurity measures allows the U.S. to harness the benefits of automation and AI while mitigating risks. By adopting these technologies, U.S. cyber diplomacy can become more proactive and effective, navigating the evolving digital landscape with greater agility. </p> </div> </dd> <dt> <a name='item15'>[15]</a> <a href ="/abs/2411.13587" title="Abstract" id="2411.13587"> arXiv:2411.13587 </a> [<a href="/pdf/2411.13587" title="Download PDF" id="pdf-2411.13587" aria-labelledby="pdf-2411.13587">pdf</a>, <a href="https://arxiv.org/html/2411.13587v1" title="View HTML" id="html-2411.13587" aria-labelledby="html-2411.13587" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13587" title="Other formats" id="oth-2411.13587" aria-labelledby="oth-2411.13587">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Exploring the Adversarial Vulnerabilities of Vision-Language-Action Models in Robotics </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+T">Taowen Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+D">Dongfang Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liang,+J+C">James Chenhao Liang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+W">Wenhao Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Q">Qifan Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Han,+C">Cheng Han</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Luo,+J">Jiebo Luo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tang,+R">Ruixiang Tang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Recently in robotics, Vision-Language-Action (VLA) models have emerged as a transformative approach, enabling robots to execute complex tasks by integrating visual and linguistic inputs within an end-to-end learning framework. While VLA models offer significant capabilities, they also introduce new attack surfaces, making them vulnerable to adversarial attacks. With these vulnerabilities largely unexplored, this paper systematically quantifies the robustness of VLA-based robotic systems. Recognizing the unique demands of robotic execution, our attack objectives target the inherent spatial and functional characteristics of robotic systems. In particular, we introduce an untargeted position-aware attack objective that leverages spatial foundations to destabilize robotic actions, and a targeted attack objective that manipulates the robotic trajectory. Additionally, we design an adversarial patch generation approach that places a small, colorful patch within the camera's view, effectively executing the attack in both digital and physical environments. Our evaluation reveals a marked degradation in task success rates, with up to a 100\% reduction across a suite of simulated robotic tasks, highlighting critical security gaps in current VLA architectures. By unveiling these vulnerabilities and proposing actionable evaluation metrics, this work advances both the understanding and enhancement of safety for VLA-based robotic systems, underscoring the necessity for developing robust defense strategies prior to physical-world deployments. </p> </div> </dd> <dt> <a name='item16'>[16]</a> <a href ="/abs/2411.13588" title="Abstract" id="2411.13588"> arXiv:2411.13588 </a> [<a href="/pdf/2411.13588" title="Download PDF" id="pdf-2411.13588" aria-labelledby="pdf-2411.13588">pdf</a>, <a href="https://arxiv.org/html/2411.13588v1" title="View HTML" id="html-2411.13588" aria-labelledby="html-2411.13588" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13588" title="Other formats" id="oth-2411.13588" aria-labelledby="oth-2411.13588">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Unveiling Redundancy in Diffusion Transformers (DiTs): A Systematic Study </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Sun,+X">Xibo Sun</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fang,+J">Jiarui Fang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+A">Aoyu Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pan,+J">Jinzhe Pan</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 9 pages including reference </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> The increased model capacity of Diffusion Transformers (DiTs) and the demand for generating higher resolutions of images and videos have led to a significant rise in inference latency, impacting real-time performance adversely. While prior research has highlighted the presence of high similarity in activation values between adjacent diffusion steps (referred to as redundancy) and proposed various caching mechanisms to mitigate computational overhead, the exploration of redundancy in existing literature remains limited, with findings often not generalizable across different DiT models. This study aims to address this gap by conducting a comprehensive investigation into redundancy across a broad spectrum of mainstream DiT models. Our experimental analysis reveals substantial variations in the distribution of redundancy across diffusion steps among different DiT models. Interestingly, within a single model, the redundancy distribution remains stable regardless of variations in input prompts, step counts, or scheduling strategies. Given the lack of a consistent pattern across diverse models, caching strategies designed for a specific group of models may not easily transfer to others. To overcome this challenge, we introduce a tool for analyzing the redundancy of individual models, enabling subsequent research to develop tailored caching strategies for specific model architectures. The project is publicly available at <a href="https://github.com/xdit-project/DiTCacheAnalysis" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item17'>[17]</a> <a href ="/abs/2411.13590" title="Abstract" id="2411.13590"> arXiv:2411.13590 </a> [<a href="/pdf/2411.13590" title="Download PDF" id="pdf-2411.13590" aria-labelledby="pdf-2411.13590">pdf</a>, <a href="https://arxiv.org/html/2411.13590v1" title="View HTML" id="html-2411.13590" aria-labelledby="html-2411.13590" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13590" title="Other formats" id="oth-2411.13590" aria-labelledby="oth-2411.13590">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Deep learning waterways for rural infrastructure development </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Pierson,+M">Matthew Pierson</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mehrabi,+Z">Zia Mehrabi</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 18 pages, 6 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> Surprisingly a number of Earth's waterways remain unmapped, with a significant number in low and middle income countries. Here we build a computer vision model (WaterNet) to learn the location of waterways in the United States, based on high resolution satellite imagery and digital elevation models, and then deploy this in novel environments in the African continent. Our outputs provide detail of waterways structures hereto unmapped. When assessed against community needs requests for rural bridge building related to access to schools, health care facilities and agricultural markets, we find these newly generated waterways capture on average 93% (country range: 88-96%) of these requests whereas Open Street Map, and the state of the art data from TDX-Hydro, capture only 36% (5-72%) and 62% (37%-85%), respectively. Because these new machine learning enabled maps are built on public and operational data acquisition this approach offers promise for capturing humanitarian needs and planning for social development in places where cartographic efforts have so far failed to deliver. The improved performance in identifying community needs missed by existing data suggests significant value for rural infrastructure development and better targeting of development interventions. </p> </div> </dd> <dt> <a name='item18'>[18]</a> <a href ="/abs/2411.13591" title="Abstract" id="2411.13591"> arXiv:2411.13591 </a> [<a href="/pdf/2411.13591" title="Download PDF" id="pdf-2411.13591" aria-labelledby="pdf-2411.13591">pdf</a>, <a href="https://arxiv.org/html/2411.13591v1" title="View HTML" id="html-2411.13591" aria-labelledby="html-2411.13591" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13591" title="Other formats" id="oth-2411.13591" aria-labelledby="oth-2411.13591">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Improved GUI Grounding via Iterative Narrowing </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Nguyen,+A">Anthony Nguyen</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI); Computation and Language (cs.CL) </div> <p class='mathjax'> GUI grounding, the task of identifying a precise location on an interface image from a natural language query, plays a crucial role in enhancing the capabilities of Vision-Language Model (VLM) agents. While general VLMs, such as GPT-4V, demonstrate strong performance across various tasks, their proficiency in GUI grounding remains suboptimal. Recent studies have focused on fine-tuning these models specifically for one-shot GUI grounding, yielding significant improvements over baseline performance. We introduce a visual prompting framework called Iterative Narrowing (IN) to further enhance the performance of both general and fine-tuned models in GUI grounding. For evaluation, we tested our method on a comprehensive benchmark comprising different UI platforms. </p> </div> </dd> <dt> <a name='item19'>[19]</a> <a href ="/abs/2411.13592" title="Abstract" id="2411.13592"> arXiv:2411.13592 </a> [<a href="/pdf/2411.13592" title="Download PDF" id="pdf-2411.13592" aria-labelledby="pdf-2411.13592">pdf</a>, <a href="https://arxiv.org/html/2411.13592v1" title="View HTML" id="html-2411.13592" aria-labelledby="html-2411.13592" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13592" title="Other formats" id="oth-2411.13592" aria-labelledby="oth-2411.13592">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Novel Speech Analysis and Correction Tool for Arabic-Speaking Children </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Berriche,+L">Lamia Berriche</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Driss,+M">Maha Driss</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Almuntashri,+A+A">Areej Ahmed Almuntashri</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lghabi,+A+M">Asma Mufreh Lghabi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Almudhi,+H+S">Heba Saleh Almudhi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Almansour,+M+A">Munerah Abdul-Aziz Almansour</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> This paper introduces a new application named ArPA for Arabic kids who have trouble with pronunciation. Our application comprises two key components: the diagnostic module and the therapeutic module. The diagnostic process involves capturing the child's speech signal, preprocessing, and analyzing it using different machine learning classifiers like K-Nearest Neighbors (KNN), Support Vector Machine (SVM), and Decision Trees as well as deep neural network classifiers like ResNet18. The therapeutic module offers eye-catching gamified interfaces in which each correctly spoken letter earns a higher avatar level, providing positive reinforcement for the child's pronunciation improvement. Two datasets were used for experimental evaluation: one from a childcare centre and the other including Arabic alphabet pronunciation recordings. Our work uses a novel technique for speech recognition using Melspectrogram and MFCC images. The results show that the ResNet18 classifier on speech-to-image converted data effectively identifies mispronunciations in Arabic speech with an accuracy of 99.015\% with Mel-Spectrogram images outperforming ResNet18 with MFCC images. </p> </div> </dd> <dt> <a name='item20'>[20]</a> <a href ="/abs/2411.13595" title="Abstract" id="2411.13595"> arXiv:2411.13595 </a> [<a href="/pdf/2411.13595" title="Download PDF" id="pdf-2411.13595" aria-labelledby="pdf-2411.13595">pdf</a>, <a href="/format/2411.13595" title="Other formats" id="oth-2411.13595" aria-labelledby="oth-2411.13595">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Towards Accessible Learning: Deep Learning-Based Potential Dysgraphia Detection and OCR for Potentially Dysgraphic Handwriting </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=D,+V">Vydeki D</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bhandari,+D">Divyansh Bhandari</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Patil,+P+P">Pranav Pratap Patil</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kulkarni,+A+A">Aarush Anand Kulkarni</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Dysgraphia is a learning disorder that affects handwriting abilities, making it challenging for children to write legibly and consistently. Early detection and monitoring are crucial for providing timely support and interventions. This study applies deep learning techniques to address the dual tasks of dysgraphia detection and optical character recognition (OCR) on handwriting samples from children with potential dysgraphic symptoms. Using a dataset of handwritten samples from Malaysian schoolchildren, we developed a custom Convolutional Neural Network (CNN) model, alongside VGG16 and ResNet50, to classify handwriting as dysgraphic or non-dysgraphic. The custom CNN model outperformed the pre-trained models, achieving a test accuracy of 91.8% with high precision, recall, and AUC, demonstrating its robustness in identifying dysgraphic handwriting features. Additionally, an OCR pipeline was created to segment and recognize individual characters in dysgraphic handwriting, achieving a character recognition accuracy of approximately 43.5%. This research highlights the potential of deep learning in supporting dysgraphia assessment, laying a foundation for tools that could assist educators and clinicians in identifying dysgraphia and tracking handwriting progress over time. The findings contribute to advancements in assistive technologies for learning disabilities, offering hope for more accessible and accurate diagnostic tools in educational and clinical settings. </p> </div> </dd> <dt> <a name='item21'>[21]</a> <a href ="/abs/2411.13597" title="Abstract" id="2411.13597"> arXiv:2411.13597 </a> [<a href="/pdf/2411.13597" title="Download PDF" id="pdf-2411.13597" aria-labelledby="pdf-2411.13597">pdf</a>, <a href="https://arxiv.org/html/2411.13597v1" title="View HTML" id="html-2411.13597" aria-labelledby="html-2411.13597" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13597" title="Other formats" id="oth-2411.13597" aria-labelledby="oth-2411.13597">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Enhancing Bidirectional Sign Language Communication: Integrating YOLOv8 and NLP for Real-Time Gesture Recognition & Translation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Bhuiyan,+H+J">Hasnat Jamil Bhuiyan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mozumder,+M+F">Mubtasim Fuad Mozumder</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Khan,+M+R+I">Md. Rabiul Islam Khan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ahmed,+M+S">Md. Sabbir Ahmed</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nahim,+N+Z">Nabuat Zaman Nahim</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> The primary concern of this research is to take American Sign Language (ASL) data through real time camera footage and be able to convert the data and information into text. Adding to that, we are also putting focus on creating a framework that can also convert text into sign language in real time which can help us break the language barrier for the people who are in need. In this work, for recognising American Sign Language (ASL), we have used the You Only Look Once(YOLO) model and Convolutional Neural Network (CNN) model. YOLO model is run in real time and automatically extracts discriminative spatial-temporal characteristics from the raw video stream without the need for any prior knowledge, eliminating design flaws. The CNN model here is also run in real time for sign language detection. We have introduced a novel method for converting text based input to sign language by making a framework that will take a sentence as input, identify keywords from that sentence and then show a video where sign language is performed with respect to the sentence given as input in real time. To the best of our knowledge, this is a rare study to demonstrate bidirectional sign language communication in real time in the American Sign Language (ASL). </p> </div> </dd> <dt> <a name='item22'>[22]</a> <a href ="/abs/2411.13598" title="Abstract" id="2411.13598"> arXiv:2411.13598 </a> [<a href="/pdf/2411.13598" title="Download PDF" id="pdf-2411.13598" aria-labelledby="pdf-2411.13598">pdf</a>, <a href="https://arxiv.org/html/2411.13598v1" title="View HTML" id="html-2411.13598" aria-labelledby="html-2411.13598" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13598" title="Other formats" id="oth-2411.13598" aria-labelledby="oth-2411.13598">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Preserving Expert-Level Privacy in Offline Reinforcement Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Sharma,+N">Navodita Sharma</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Vinod,+V">Vishnu Vinod</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Thakurta,+A">Abhradeep Thakurta</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Agarwal,+A">Alekh Agarwal</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Balle,+B">Borja Balle</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dann,+C">Christoph Dann</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Raghuveer,+A">Aravindan Raghuveer</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Cryptography and Security (cs.CR)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> The offline reinforcement learning (RL) problem aims to learn an optimal policy from historical data collected by one or more behavioural policies (experts) by interacting with an environment. However, the individual experts may be privacy-sensitive in that the learnt policy may retain information about their precise choices. In some domains like personalized retrieval, advertising and healthcare, the expert choices are considered sensitive data. To provably protect the privacy of such experts, we propose a novel consensus-based expert-level differentially private offline RL training approach compatible with any existing offline RL algorithm. We prove rigorous differential privacy guarantees, while maintaining strong empirical performance. Unlike existing work in differentially private RL, we supplement the theory with proof-of-concept experiments on classic RL environments featuring large continuous state spaces, demonstrating substantial improvements over a natural baseline across multiple tasks. </p> </div> </dd> <dt> <a name='item23'>[23]</a> <a href ="/abs/2411.13604" title="Abstract" id="2411.13604"> arXiv:2411.13604 </a> [<a href="/pdf/2411.13604" title="Download PDF" id="pdf-2411.13604" aria-labelledby="pdf-2411.13604">pdf</a>, <a href="https://arxiv.org/html/2411.13604v1" title="View HTML" id="html-2411.13604" aria-labelledby="html-2411.13604" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13604" title="Other formats" id="oth-2411.13604" aria-labelledby="oth-2411.13604">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> RadPhi-3: Small Language Models for Radiology </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ranjit,+M">Mercy Ranjit</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shaury">Shaury Srivastav</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ganu,+T">Tanuja Ganu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Computation and Language (cs.CL); Machine Learning (cs.LG) </div> <p class='mathjax'> LLM based copilot assistants are useful in everyday tasks. There is a proliferation in the exploration of AI assistant use cases to support radiology workflows in a reliable manner. In this work, we present RadPhi-3, a Small Language Model instruction tuned from Phi-3-mini-4k-instruct with 3.8B parameters to assist with various tasks in radiology workflows. While impression summary generation has been the primary task which has been explored in prior works w.r.t radiology reports of Chest X-rays, we also explore other useful tasks like change summary generation comparing the current radiology report and its prior report, section extraction from radiology reports, tagging the reports with various pathologies and tubes, lines or devices present in them etc. In-addition, instruction tuning RadPhi-3 involved learning from a credible knowledge source used by radiologists, <a href="http://Radiopaedia.org" rel="external noopener nofollow" class="link-external link-http">this http URL</a>. RadPhi-3 can be used both to give reliable answers for radiology related queries as well as perform useful tasks related to radiology reports. RadPhi-3 achieves SOTA results on the RaLEs radiology report generation benchmark. </p> </div> </dd> <dt> <a name='item24'>[24]</a> <a href ="/abs/2411.13607" title="Abstract" id="2411.13607"> arXiv:2411.13607 </a> [<a href="/pdf/2411.13607" title="Download PDF" id="pdf-2411.13607" aria-labelledby="pdf-2411.13607">pdf</a>, <a href="https://arxiv.org/html/2411.13607v1" title="View HTML" id="html-2411.13607" aria-labelledby="html-2411.13607" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13607" title="Other formats" id="oth-2411.13607" aria-labelledby="oth-2411.13607">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> VioPose: Violin Performance 4D Pose Estimation by Hierarchical Audiovisual Inference </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Yoo,+S+J">Seong Jong Yoo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shrestha,+S">Snehesh Shrestha</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Muresanu,+I">Irina Muresanu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ferm%C3%BCller,+C">Cornelia Ferm眉ller</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted by WACV 2025 in Round 1. First two authors contributed equally </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Musicians delicately control their bodies to generate music. Sometimes, their motions are too subtle to be captured by the human eye. To analyze how they move to produce the music, we need to estimate precise 4D human pose (3D pose over time). However, current state-of-the-art (SoTA) visual pose estimation algorithms struggle to produce accurate monocular 4D poses because of occlusions, partial views, and human-object interactions. They are limited by the viewing angle, pixel density, and sampling rate of the cameras and fail to estimate fast and subtle movements, such as in the musical effect of vibrato. We leverage the direct causal relationship between the music produced and the human motions creating them to address these challenges. We propose VioPose: a novel multimodal network that hierarchically estimates dynamics. High-level features are cascaded to low-level features and integrated into Bayesian updates. Our architecture is shown to produce accurate pose sequences, facilitating precise motion analysis, and outperforms SoTA. As part of this work, we collected the largest and the most diverse calibrated violin-playing dataset, including video, sound, and 3D motion capture poses. Project page: is available at <a href="https://sj-yoo.info/viopose/" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item25'>[25]</a> <a href ="/abs/2411.13609" title="Abstract" id="2411.13609"> arXiv:2411.13609 </a> [<a href="/pdf/2411.13609" title="Download PDF" id="pdf-2411.13609" aria-labelledby="pdf-2411.13609">pdf</a>, <a href="https://arxiv.org/html/2411.13609v1" title="View HTML" id="html-2411.13609" aria-labelledby="html-2411.13609" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13609" title="Other formats" id="oth-2411.13609" aria-labelledby="oth-2411.13609">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> What You See Is What Matters: A Novel Visual and Physics-Based Metric for Evaluating Video Generation Quality </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Z">Zihan Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+S">Songlin Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hao,+L">Lingyan Hao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Song,+B">Bowen Song</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hu,+X">Xinyu Hu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> As video generation models advance rapidly, assessing the quality of generated videos has become increasingly critical. Existing metrics, such as Fr茅chet Video Distance (FVD), Inception Score (IS), and ClipSim, measure quality primarily in latent space rather than from a human visual perspective, often overlooking key aspects like appearance and motion consistency to physical laws. In this paper, we propose a novel metric, VAMP (Visual Appearance and Motion Plausibility), that evaluates both the visual appearance and physical plausibility of generated videos. VAMP is composed of two main components: an appearance score, which assesses color, shape, and texture consistency across frames, and a motion score, which evaluates the realism of object movements. We validate VAMP through two experiments: corrupted video evaluation and generated video evaluation. In the corrupted video evaluation, we introduce various types of corruptions into real videos and measure the correlation between corruption severity and VAMP scores. In the generated video evaluation, we use state-of-the-art models to generate videos from carefully designed prompts and compare VAMP's performance to human evaluators' rankings. Our results demonstrate that VAMP effectively captures both visual fidelity and temporal consistency, offering a more comprehensive evaluation of video quality than traditional methods. </p> </div> </dd> <dt> <a name='item26'>[26]</a> <a href ="/abs/2411.13610" title="Abstract" id="2411.13610"> arXiv:2411.13610 </a> [<a href="/pdf/2411.13610" title="Download PDF" id="pdf-2411.13610" aria-labelledby="pdf-2411.13610">pdf</a>, <a href="https://arxiv.org/html/2411.13610v1" title="View HTML" id="html-2411.13610" aria-labelledby="html-2411.13610" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13610" title="Other formats" id="oth-2411.13610" aria-labelledby="oth-2411.13610">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Video2BEV: Transforming Drone Videos to BEVs for Video-based Geo-localization </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ju,+H">Hao Ju</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zheng,+Z">Zhedong Zheng</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Existing approaches to drone visual geo-localization predominantly adopt the image-based setting, where a single drone-view snapshot is matched with images from other platforms. Such task formulation, however, underutilizes the inherent video output of the drone and is sensitive to occlusions and environmental constraints. To address these limitations, we formulate a new video-based drone geo-localization task and propose the Video2BEV paradigm. This paradigm transforms the video into a Bird's Eye View (BEV), simplifying the subsequent matching process. In particular, we employ Gaussian Splatting to reconstruct a 3D scene and obtain the BEV projection. Different from the existing transform methods, \eg, polar transform, our BEVs preserve more fine-grained details without significant distortion. To further improve model scalability toward diverse BEVs and satellite figures, our Video2BEV paradigm also incorporates a diffusion-based module for generating hard negative samples, which facilitates discriminative feature learning. To validate our approach, we introduce UniV, a new video-based geo-localization dataset that extends the image-based University-1652 dataset. UniV features flight paths at $30^\circ$ and $45^\circ$ elevation angles with increased frame rates of up to 10 frames per second (FPS). Extensive experiments on the UniV dataset show that our Video2BEV paradigm achieves competitive recall rates and outperforms conventional video-based methods. Compared to other methods, our proposed approach exhibits robustness at lower elevations with more occlusions. </p> </div> </dd> <dt> <a name='item27'>[27]</a> <a href ="/abs/2411.13611" title="Abstract" id="2411.13611"> arXiv:2411.13611 </a> [<a href="/pdf/2411.13611" title="Download PDF" id="pdf-2411.13611" aria-labelledby="pdf-2411.13611">pdf</a>, <a href="https://arxiv.org/html/2411.13611v1" title="View HTML" id="html-2411.13611" aria-labelledby="html-2411.13611" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13611" title="Other formats" id="oth-2411.13611" aria-labelledby="oth-2411.13611">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> DSTC: Direct Preference Learning with Only Self-Generated Tests and Code to Improve Code LMs </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Z">Zhihan Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+S">Shenao Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Y">Yongfei Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+B">Boyi Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+Y">Yingxiang Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Z">Zhaoran Wang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Software Engineering (cs.SE)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Direct preference learning offers a promising and computation-efficient beyond supervised fine-tuning (SFT) for improving code generation in coding large language models (LMs). However, the scarcity of reliable preference data is a bottleneck for the performance of direct preference learning to improve the coding accuracy of code LMs. In this paper, we introduce \underline{\textbf{D}}irect Preference Learning with Only \underline{\textbf{S}}elf-Generated \underline{\textbf{T}}ests and \underline{\textbf{C}}ode (DSTC), a framework that leverages only self-generated code snippets and tests to construct reliable preference pairs such that direct preference learning can improve LM coding accuracy without external annotations. DSTC combines a minimax selection process and test-code concatenation to improve preference pair quality, reducing the influence of incorrect self-generated tests and enhancing model performance without the need for costly reward models. When applied with direct preference learning methods such as Direct Preference Optimization (DPO) and Kahneman-Tversky Optimization (KTO), DSTC yields stable improvements in coding accuracy (pass@1 score) across diverse coding benchmarks, including HumanEval, MBPP, and BigCodeBench, demonstrating both its effectiveness and scalability for models of various sizes. This approach autonomously enhances code generation accuracy across LLMs of varying sizes, reducing reliance on expensive annotated coding datasets. </p> </div> </dd> <dt> <a name='item28'>[28]</a> <a href ="/abs/2411.13612" title="Abstract" id="2411.13612"> arXiv:2411.13612 </a> [<a href="/pdf/2411.13612" title="Download PDF" id="pdf-2411.13612" aria-labelledby="pdf-2411.13612">pdf</a>, <a href="https://arxiv.org/html/2411.13612v1" title="View HTML" id="html-2411.13612" aria-labelledby="html-2411.13612" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13612" title="Other formats" id="oth-2411.13612" aria-labelledby="oth-2411.13612">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Efficient Streaming Voice Steganalysis in Challenging Detection Scenarios </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+P">Pengcheng Zhou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fang,+Z">Zhengyang Fang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+Z">Zhongliang Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+Z">Zhili Zhou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+L">Linna Zhou</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Cryptography and Security (cs.CR)</span>; Machine Learning (cs.LG); Sound (cs.SD); Audio and Speech Processing (eess.AS) </div> <p class='mathjax'> In recent years, there has been an increasing number of information hiding techniques based on network streaming media, focusing on how to covertly and efficiently embed secret information into real-time transmitted network media signals to achieve concealed communication. The misuse of these techniques can lead to significant security risks, such as the spread of malicious code, commands, and viruses. Current steganalysis methods for network voice streams face two major challenges: efficient detection under low embedding rates and short duration conditions. These challenges arise because, with low embedding rates (e.g., as low as 10%) and short transmission durations (e.g., only 0.1 second), detection models struggle to acquire sufficiently rich sample features, making effective steganalysis difficult. To address these challenges, this paper introduces a Dual-View VoIP Steganalysis Framework (DVSF). The framework first randomly obfuscates parts of the native steganographic descriptors in VoIP stream segments, making the steganographic features of hard-to-detect samples more pronounced and easier to learn. It then captures fine-grained local features related to steganography, building on the global features of VoIP. Specially constructed VoIP segment triplets further adjust the feature distances within the model. Ultimately, this method effectively address the detection difficulty in VoIP. Extensive experiments demonstrate that our method significantly improves the accuracy of streaming voice steganalysis in these challenging detection scenarios, surpassing existing state-of-the-art methods and offering superior near-real-time performance. </p> </div> </dd> <dt> <a name='item29'>[29]</a> <a href ="/abs/2411.13613" title="Abstract" id="2411.13613"> arXiv:2411.13613 </a> [<a href="/pdf/2411.13613" title="Download PDF" id="pdf-2411.13613" aria-labelledby="pdf-2411.13613">pdf</a>, <a href="https://arxiv.org/html/2411.13613v1" title="View HTML" id="html-2411.13613" aria-labelledby="html-2411.13613" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13613" title="Other formats" id="oth-2411.13613" aria-labelledby="oth-2411.13613">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> SuPLE: Robot Learning with Lyapunov Rewards </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Nguyen,+P">Phu Nguyen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Polani,+D">Daniel Polani</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tiomkin,+S">Stas Tiomkin</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 7 pages, 4 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> The reward function is an essential component in robot learning. Reward directly affects the sample and computational complexity of learning, and the quality of a solution. The design of informative rewards requires domain knowledge, which is not always available. We use the properties of the dynamics to produce system-appropriate reward without adding external assumptions. Specifically, we explore an approach to utilize the Lyapunov exponents of the system dynamics to generate a system-immanent reward. We demonstrate that the `Sum of the Positive Lyapunov Exponents' (SuPLE) is a strong candidate for the design of such a reward. We develop a computational framework for the derivation of this reward, and demonstrate its effectiveness on classical benchmarks for sample-based stabilization of various dynamical systems. It eliminates the need to start the training trajectories at arbitrary states, also known as auxiliary exploration. While the latter is a common practice in simulated robot learning, it is unpractical to consider to use it in real robotic systems, since they typically start from natural rest states such as a pendulum at the bottom, a robot on the ground, etc. and can not be easily initialized at arbitrary states. Comparing the performance of SuPLE to commonly-used reward functions, we observe that the latter fail to find a solution without auxiliary exploration, even for the task of swinging up the double pendulum and keeping it stable at the upright position, a prototypical scenario for multi-linked robots. SuPLE-induced rewards for robot learning offer a novel route for effective robot learning in typical as opposed to highly specialized or fine-tuned scenarios. Our code is publicly available for reproducibility and further research. </p> </div> </dd> <dt> <a name='item30'>[30]</a> <a href ="/abs/2411.13614" title="Abstract" id="2411.13614"> arXiv:2411.13614 </a> [<a href="/pdf/2411.13614" title="Download PDF" id="pdf-2411.13614" aria-labelledby="pdf-2411.13614">pdf</a>, <a href="https://arxiv.org/html/2411.13614v1" title="View HTML" id="html-2411.13614" aria-labelledby="html-2411.13614" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13614" title="Other formats" id="oth-2411.13614" aria-labelledby="oth-2411.13614">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Verification and Validation of Autonomous Systems </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Shetiya,+S+S">Sneha Sudhir Shetiya</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Vyas,+V">Vikas Vyas</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Renukuntla,+S">Shreyas Renukuntla</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Software Engineering (cs.SE)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> This paper describes how to proficiently prevent software defects in autonomous vehicles, discover and correct defects if they are encountered, and create a higher level of assurance in the software product development phase. It also describes how to ensure high assurance on software reliability. </p> </div> </dd> <dt> <a name='item31'>[31]</a> <a href ="/abs/2411.13616" title="Abstract" id="2411.13616"> arXiv:2411.13616 </a> [<a href="/pdf/2411.13616" title="Download PDF" id="pdf-2411.13616" aria-labelledby="pdf-2411.13616">pdf</a>, <a href="/format/2411.13616" title="Other formats" id="oth-2411.13616" aria-labelledby="oth-2411.13616">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Identifying Semantic Similarity for UX Items from Established Questionnaires Using ChatGPT-4 </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Graser,+S">Stefan Graser</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Schrepp,+M">Martin Schrepp</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=B%C3%B6hm,+S">Stephan B枚hm</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 16 pages, 3 figures, International Journal on Advances in Systems and Measurements, vol 17 no 1 & 2, year 2024, <a href="http://www.iariajournals.org/systems_and_measurements/" rel="external noopener nofollow" class="link-external link-http">this http URL</a>. arXiv admin note: substantial text overlap with <a href="https://arxiv.org/abs/2411.13118" data-arxiv-id="2411.13118" class="link-https">arXiv:2411.13118</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Human-Computer Interaction (cs.HC)</span> </div> <p class='mathjax'> Questionnaires are a widely used tool for measuring the user experience (UX) of products. There exists a huge number of such questionnaires that contain different items (questions) and scales representing distinct aspects of UX, such as efficiency, learnability, fun of use, or aesthetics. These items and scales are not independent; they often have semantic overlap. However, due to the large number of available items and scales in the UX f ield, analyzing and understanding these semantic dependencies can be challenging. Large language models (LLM) are powerful tools to categorize texts, including UX items. We explore how ChatGPT-4 can be utilized to analyze the semantic structure of sets of UX items. This paper investigates three different use cases. In the first investigation, ChatGPT-4 is used to generate a semantic classification of UX items extracted from 40 UX questionnaires. The results demonstrate that ChatGPT-4 can effectively classify items into meaningful topics. The second investigation demonstrates ChatGPT-4's ability to filter items related to a predefined UX concept from a pool of UX items. In the third investigation, a second set of more abstract items is used to describe another classification task. The outcome of this investigation helps to determine semantic similarities between common UX concepts and enhances our understanding of the concept of UX. Overall, it is considered useful to apply GenAI in UX research </p> </div> </dd> <dt> <a name='item32'>[32]</a> <a href ="/abs/2411.13617" title="Abstract" id="2411.13617"> arXiv:2411.13617 </a> [<a href="/pdf/2411.13617" title="Download PDF" id="pdf-2411.13617" aria-labelledby="pdf-2411.13617">pdf</a>, <a href="https://arxiv.org/html/2411.13617v1" title="View HTML" id="html-2411.13617" aria-labelledby="html-2411.13617" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13617" title="Other formats" id="oth-2411.13617" aria-labelledby="oth-2411.13617">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Maximum-norm a posteriori error bounds for parabolic equations discretised by the extrapolated Euler method in time and FEM in space </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Lin%C3%9F,+T">Torsten Lin脽</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Radojev,+G">Goran Radojev</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> arXiv admin note: substantial text overlap with <a href="https://arxiv.org/abs/2304.01637" data-arxiv-id="2304.01637" class="link-https">arXiv:2304.01637</a>, <a href="https://arxiv.org/abs/2208.08153" data-arxiv-id="2208.08153" class="link-https">arXiv:2208.08153</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Numerical Analysis (math.NA)</span> </div> <p class='mathjax'> A class of linear parabolic equations is considered. We derive a framework for the a posteriori error analysis of time discretisations by Richardson extrapolation of arbitrary order combined with finite element discretisations in space. We use the idea of elliptic reconstructions and certain bounds for the Green's function of the parabolic operator. The crucial point in the analysis is the design of suitable polynomial reconstructions in time from approximations that are given only in the mesh points. </p> </div> </dd> <dt> <a name='item33'>[33]</a> <a href ="/abs/2411.13619" title="Abstract" id="2411.13619"> arXiv:2411.13619 </a> [<a href="/pdf/2411.13619" title="Download PDF" id="pdf-2411.13619" aria-labelledby="pdf-2411.13619">pdf</a>, <a href="https://arxiv.org/html/2411.13619v1" title="View HTML" id="html-2411.13619" aria-labelledby="html-2411.13619" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13619" title="Other formats" id="oth-2411.13619" aria-labelledby="oth-2411.13619">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Non-Linear Outlier Synthesis for Out-of-Distribution Detection </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Doorenbos,+L">Lars Doorenbos</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sznitman,+R">Raphael Sznitman</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=M%C3%A1rquez-Neila,+P">Pablo M谩rquez-Neila</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> The reliability of supervised classifiers is severely hampered by their limitations in dealing with unexpected inputs, leading to great interest in out-of-distribution (OOD) detection. Recently, OOD detectors trained on synthetic outliers, especially those generated by large diffusion models, have shown promising results in defining robust OOD decision boundaries. Building on this progress, we present NCIS, which enhances the quality of synthetic outliers by operating directly in the diffusion's model embedding space rather than combining disjoint models as in previous work and by modeling class-conditional manifolds with a conditional volume-preserving network for more expressive characterization of the training distribution. We demonstrate that these improvements yield new state-of-the-art OOD detection results on standard ImageNet100 and CIFAR100 benchmarks and provide insights into the importance of data pre-processing and other key design choices. We make our code available at \url{<a href="https://github.com/LarsDoorenbos/NCIS" rel="external noopener nofollow" class="link-external link-https">this https URL</a>}. </p> </div> </dd> <dt> <a name='item34'>[34]</a> <a href ="/abs/2411.13620" title="Abstract" id="2411.13620"> arXiv:2411.13620 </a> [<a href="/pdf/2411.13620" title="Download PDF" id="pdf-2411.13620" aria-labelledby="pdf-2411.13620">pdf</a>, <a href="https://arxiv.org/html/2411.13620v1" title="View HTML" id="html-2411.13620" aria-labelledby="html-2411.13620" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13620" title="Other formats" id="oth-2411.13620" aria-labelledby="oth-2411.13620">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Robust SG-NeRF: Robust Scene Graph Aided Neural Surface Reconstruction </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Gu,+Y">Yi Gu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ye,+D">Dongjun Ye</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Z">Zhaorui Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+J">Jiaxu Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cao,+J">Jiahang Cao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+R">Renjing Xu</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> <a href="https://rsg-nerf.github.io/RSG-NeRF/" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Neural surface reconstruction relies heavily on accurate camera poses as input. Despite utilizing advanced pose estimators like COLMAP or ARKit, camera poses can still be noisy. Existing pose-NeRF joint optimization methods handle poses with small noise (inliers) effectively but struggle with large noise (outliers), such as mirrored poses. In this work, we focus on mitigating the impact of outlier poses. Our method integrates an inlier-outlier confidence estimation scheme, leveraging scene graph information gathered during the data preparation phase. Unlike previous works directly using rendering metrics as the reference, we employ a detached color network that omits the viewing direction as input to minimize the impact caused by shape-radiance ambiguities. This enhanced confidence updating strategy effectively differentiates between inlier and outlier poses, allowing us to sample more rays from inlier poses to construct more reliable radiance fields. Additionally, we introduce a re-projection loss based on the current Signed Distance Function (SDF) and pose estimations, strengthening the constraints between matching image pairs. For outlier poses, we adopt a Monte Carlo re-localization method to find better solutions. We also devise a scene graph updating strategy to provide more accurate information throughout the training process. We validate our approach on the SG-NeRF and DTU datasets. Experimental results on various datasets demonstrate that our methods can consistently improve the reconstruction qualities and pose accuracies. </p> </div> </dd> <dt> <a name='item35'>[35]</a> <a href ="/abs/2411.13623" title="Abstract" id="2411.13623"> arXiv:2411.13623 </a> [<a href="/pdf/2411.13623" title="Download PDF" id="pdf-2411.13623" aria-labelledby="pdf-2411.13623">pdf</a>, <a href="https://arxiv.org/html/2411.13623v1" title="View HTML" id="html-2411.13623" aria-labelledby="html-2411.13623" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13623" title="Other formats" id="oth-2411.13623" aria-labelledby="oth-2411.13623">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Unsupervised Foundation Model-Agnostic Slide-Level Representation Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Lenz,+T">Tim Lenz</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Neidlinger,+P">Peter Neidlinger</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ligero,+M">Marta Ligero</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=W%C3%B6lflein,+G">Georg W枚lflein</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=van+Treeck,+M">Marko van Treeck</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kather,+J+N">Jakob Nikolas Kather</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Representation learning of pathology whole-slide images (WSIs) has primarily relied on weak supervision with Multiple Instance Learning (MIL). This approach leads to slide representations highly tailored to a specific clinical task. Self-supervised learning (SSL) has been successfully applied to train histopathology foundation models (FMs) for patch embedding generation. However, generating patient or slide level embeddings remains challenging. Existing approaches for slide representation learning extend the principles of SSL from patch level learning to entire slides by aligning different augmentations of the slide or by utilizing multimodal data. By integrating tile embeddings from multiple FMs, we propose a new single modality SSL method in feature space that generates useful slide representations. Our contrastive pretraining strategy, called COBRA, employs multiple FMs and an architecture based on Mamba-2. COBRA exceeds performance of state-of-the-art slide encoders on four different public CPTAC cohorts on average by at least +3.8% AUC, despite only being pretrained on 3048 WSIs from TCGA. Additionally, COBRA is readily compatible at inference time with previously unseen feature extractors. </p> </div> </dd> <dt> <a name='item36'>[36]</a> <a href ="/abs/2411.13626" title="Abstract" id="2411.13626"> arXiv:2411.13626 </a> [<a href="/pdf/2411.13626" title="Download PDF" id="pdf-2411.13626" aria-labelledby="pdf-2411.13626">pdf</a>, <a href="https://arxiv.org/html/2411.13626v1" title="View HTML" id="html-2411.13626" aria-labelledby="html-2411.13626" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13626" title="Other formats" id="oth-2411.13626" aria-labelledby="oth-2411.13626">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Principles of Visual Tokens for Efficient Video Understanding </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Hao,+X">Xinyue Hao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+G">Gen Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gowda,+S+N">Shreyank N Gowda</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fisher,+R+B">Robert B Fisher</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+J">Jonathan Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Arnab,+A">Anurag Arnab</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sevilla-Lara,+L">Laura Sevilla-Lara</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Video understanding has made huge strides in recent years, relying largely on the power of the transformer architecture. As this architecture is notoriously expensive and video is highly redundant, research into improving efficiency has become particularly relevant. This has led to many creative solutions, including token merging and token selection. While most methods succeed in reducing the cost of the model and maintaining accuracy, an interesting pattern arises: most methods do not outperform the random sampling baseline. In this paper we take a closer look at this phenomenon and make several observations. First, we develop an oracle for the value of tokens which exposes a clear Pareto distribution where most tokens have remarkably low value, and just a few carry most of the perceptual information. Second, we analyze why this oracle is extremely hard to learn, as it does not consistently coincide with visual cues. Third, we observe that easy videos need fewer tokens to maintain accuracy. We build on these and further insights to propose a lightweight video model we call LITE that can select a small number of tokens effectively, outperforming state-of-the-art and existing baselines across datasets (Kinetics400 and Something-Something-V2) in the challenging trade-off of computation (GFLOPs) vs accuracy. </p> </div> </dd> <dt> <a name='item37'>[37]</a> <a href ="/abs/2411.13627" title="Abstract" id="2411.13627"> arXiv:2411.13627 </a> [<a href="/pdf/2411.13627" title="Download PDF" id="pdf-2411.13627" aria-labelledby="pdf-2411.13627">pdf</a>, <a href="https://arxiv.org/html/2411.13627v1" title="View HTML" id="html-2411.13627" aria-labelledby="html-2411.13627" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13627" title="Other formats" id="oth-2411.13627" aria-labelledby="oth-2411.13627">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> CryptoFormalEval: Integrating LLMs and Formal Verification for Automated Cryptographic Protocol Vulnerability Detection </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Curaba,+C">Cristian Curaba</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=D'Ambrosi,+D">Denis D'Ambrosi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Minisini,+A">Alessandro Minisini</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Antol%C3%ADn,+N+P">Natalia P茅rez-Campanero Antol铆n</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Cryptography and Security (cs.CR)</span>; Artificial Intelligence (cs.AI); Symbolic Computation (cs.SC) </div> <p class='mathjax'> Cryptographic protocols play a fundamental role in securing modern digital infrastructure, but they are often deployed without prior formal verification. This could lead to the adoption of distributed systems vulnerable to attack vectors. Formal verification methods, on the other hand, require complex and time-consuming techniques that lack automatization. In this paper, we introduce a benchmark to assess the ability of Large Language Models (LLMs) to autonomously identify vulnerabilities in new cryptographic protocols through interaction with Tamarin: a theorem prover for protocol verification. We created a manually validated dataset of novel, flawed, communication protocols and designed a method to automatically verify the vulnerabilities found by the AI agents. Our results about the performances of the current frontier models on the benchmark provides insights about the possibility of cybersecurity applications by integrating LLMs with symbolic reasoning systems. </p> </div> </dd> <dt> <a name='item38'>[38]</a> <a href ="/abs/2411.13628" title="Abstract" id="2411.13628"> arXiv:2411.13628 </a> [<a href="/pdf/2411.13628" title="Download PDF" id="pdf-2411.13628" aria-labelledby="pdf-2411.13628">pdf</a>, <a href="https://arxiv.org/html/2411.13628v1" title="View HTML" id="html-2411.13628" aria-labelledby="html-2411.13628" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13628" title="Other formats" id="oth-2411.13628" aria-labelledby="oth-2411.13628">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> MambaDETR: Query-based Temporal Modeling using State Space Model for Multi-View 3D Object Detection </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ning,+T">Tong Ning</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lu,+K">Ke Lu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jiang,+X">Xirui Jiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xue,+J">Jian Xue</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Utilizing temporal information to improve the performance of 3D detection has made great progress recently in the field of autonomous driving. Traditional transformer-based temporal fusion methods suffer from quadratic computational cost and information decay as the length of the frame sequence increases. In this paper, we propose a novel method called MambaDETR, whose main idea is to implement temporal fusion in the efficient state space. Moreover, we design a Motion Elimination module to remove the relatively static objects for temporal fusion. On the standard nuScenes benchmark, our proposed MambaDETR achieves remarkable result in the 3D object detection task, exhibiting state-of-the-art performance among existing temporal fusion methods. </p> </div> </dd> <dt> <a name='item39'>[39]</a> <a href ="/abs/2411.13631" title="Abstract" id="2411.13631"> arXiv:2411.13631 </a> [<a href="/pdf/2411.13631" title="Download PDF" id="pdf-2411.13631" aria-labelledby="pdf-2411.13631">pdf</a>, <a href="/format/2411.13631" title="Other formats" id="oth-2411.13631" aria-labelledby="oth-2411.13631">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Sparse Input View Synthesis: 3D Representations and Reliable Priors </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Somraj,+N">Nagabhushan Somraj</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> PhD Thesis of Nagabhushan S N, Dept of ECE, Indian Institute of Science (IISc); Advisor: Dr. Rajiv Soundararajan; Thesis Reviewers: Dr. Kaushik Mitra (IIT Madras), Dr. Aniket Bera (Purdue University); Submitted: May 2024; Accepted and Defended: Sep 2024; Abstract condensed, please check the PDF for full abstract </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Novel view synthesis refers to the problem of synthesizing novel viewpoints of a scene given the images from a few viewpoints. This is a fundamental problem in computer vision and graphics, and enables a vast variety of applications such as meta-verse, free-view watching of events, video gaming, video stabilization and video compression. Recent 3D representations such as radiance fields and multi-plane images significantly improve the quality of images rendered from novel viewpoints. However, these models require a dense sampling of input views for high quality renders. Their performance goes down significantly when only a few input views are available. In this thesis, we focus on the sparse input novel view synthesis problem for both static and dynamic scenes. </p> </div> </dd> <dt> <a name='item40'>[40]</a> <a href ="/abs/2411.13632" title="Abstract" id="2411.13632"> arXiv:2411.13632 </a> [<a href="/pdf/2411.13632" title="Download PDF" id="pdf-2411.13632" aria-labelledby="pdf-2411.13632">pdf</a>, <a href="/format/2411.13632" title="Other formats" id="oth-2411.13632" aria-labelledby="oth-2411.13632">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> ID-Patch: Robust ID Association for Group Photo Personalization </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Y">Yimeng Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhi,+T">Tiancheng Zhi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+J">Jing Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sang,+S">Shen Sang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jiang,+L">Liming Jiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yan,+Q">Qing Yan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+S">Sijia Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Luo,+L">Linjie Luo</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Project Page is: <a href="https://byteaigc.github.io/ID-Patch/" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> The ability to synthesize personalized group photos and specify the positions of each identity offers immense creative potential. While such imagery can be visually appealing, it presents significant challenges for existing technologies. A persistent issue is identity (ID) leakage, where injected facial features interfere with one another, resulting in low face resemblance, incorrect positioning, and visual artifacts. Existing methods suffer from limitations such as the reliance on segmentation models, increased runtime, or a high probability of ID leakage. To address these challenges, we propose ID-Patch, a novel method that provides robust association between identities and 2D positions. Our approach generates an ID patch and ID embeddings from the same facial features: the ID patch is positioned on the conditional image for precise spatial control, while the ID embeddings integrate with text embeddings to ensure high resemblance. Experimental results demonstrate that ID-Patch surpasses baseline methods across metrics, such as face ID resemblance, ID-position association accuracy, and generation efficiency. Project Page is: <a href="https://byteaigc.github.io/ID-Patch/" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </p> </div> </dd> <dt> <a name='item41'>[41]</a> <a href ="/abs/2411.13653" title="Abstract" id="2411.13653"> arXiv:2411.13653 </a> [<a href="/pdf/2411.13653" title="Download PDF" id="pdf-2411.13653" aria-labelledby="pdf-2411.13653">pdf</a>, <a href="/format/2411.13653" title="Other formats" id="oth-2411.13653" aria-labelledby="oth-2411.13653">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> No Free Delivery Service: Epistemic limits of passive data collection in complex social systems </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Nickel,+M">Maximilian Nickel</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> To appear in NeurIPS'24 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Artificial Intelligence (cs.AI)</span>; Machine Learning (stat.ML) </div> <p class='mathjax'> Rapid model validation via the train-test paradigm has been a key driver for the breathtaking progress in machine learning and AI. However, modern AI systems often depend on a combination of tasks and data collection practices that violate all assumptions ensuring test validity. Yet, without rigorous model validation we cannot ensure the intended outcomes of deployed AI systems, including positive social impact, nor continue to advance AI research in a scientifically sound way. In this paper, I will show that for widely considered inference settings in complex social systems the train-test paradigm does not only lack a justification but is indeed invalid for any risk estimator, including counterfactual and causal estimators, with high probability. These formal impossibility results highlight a fundamental epistemic issue, i.e., that for key tasks in modern AI we cannot know whether models are valid under current data collection practices. Importantly, this includes variants of both recommender systems and reasoning via large language models, and neither na茂ve scaling nor limited benchmarks are suited to address this issue. I am illustrating these results via the widely used MovieLens benchmark and conclude by discussing the implications of these results for AI in social systems, including possible remedies such as participatory data curation and open science. </p> </div> </dd> <dt> <a name='item42'>[42]</a> <a href ="/abs/2411.13668" title="Abstract" id="2411.13668"> arXiv:2411.13668 </a> [<a href="/pdf/2411.13668" title="Download PDF" id="pdf-2411.13668" aria-labelledby="pdf-2411.13668">pdf</a>, <a href="https://arxiv.org/html/2411.13668v1" title="View HTML" id="html-2411.13668" aria-labelledby="html-2411.13668" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13668" title="Other formats" id="oth-2411.13668" aria-labelledby="oth-2411.13668">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Hermes: A General-Purpose Proxy-Enabled Networking Architecture </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Farkiani,+B">Behrooz Farkiani</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+F">Fan Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+K">Ke Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=DeHart,+J">John DeHart</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Parwatikar,+J">Jyoti Parwatikar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Crowley,+P">Patrick Crowley</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Networking and Internet Architecture (cs.NI)</span>; Performance (cs.PF) </div> <p class='mathjax'> We introduce Hermes, a general-purpose networking architecture built on an overlay of reconfigurable proxies. Hermes delegates networking responsibilities from applications and services to the overlay proxies. It employs a range of proxying and tunneling techniques, utilizes HTTP as its core component, and incorporates assisting components to facilitate service delivery, enhance communication, and improve end-users' experience. To substantiate these benefits, we prototyped Hermes and demonstrated its ability to efficiently address service and communication challenges. We showed that Hermes enables end-to-end solutions for compatibility with legacy applications and protocols and reliable delivery in highly disadvantaged networking conditions. Furthermore, Hermes demonstrated its ability to provide end-to-end, business-logic-driven handling of general IP traffic and to serve as a communication pipeline for Named Data Networking, facilitating the development and adoption of future networking architectures. </p> </div> </dd> <dt> <a name='item43'>[43]</a> <a href ="/abs/2411.13672" title="Abstract" id="2411.13672"> arXiv:2411.13672 </a> [<a href="/pdf/2411.13672" title="Download PDF" id="pdf-2411.13672" aria-labelledby="pdf-2411.13672">pdf</a>, <a href="https://arxiv.org/html/2411.13672v1" title="View HTML" id="html-2411.13672" aria-labelledby="html-2411.13672" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13672" title="Other formats" id="oth-2411.13672" aria-labelledby="oth-2411.13672">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Computable Approximations of Semicomputable Graphs </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=%C4%8Ca%C4%8Di%C4%87,+V">Vedran 膶a膷i膰</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=%C4%8Celar,+M">Matea 膶elar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Horvat,+M">Marko Horvat</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Iljazovi%C4%87,+Z">Zvonko Iljazovi膰</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 18 pages, 4 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Logic in Computer Science (cs.LO)</span> </div> <p class='mathjax'> In this work, we study the computability of topological graphs, which are obtained by gluing arcs and rays together at their endpoints. We prove that every semicomputable graph in a computable metric space can be approximated, with arbitrary precision, by its computable subgraph with computable endpoints. </p> </div> </dd> <dt> <a name='item44'>[44]</a> <a href ="/abs/2411.13674" title="Abstract" id="2411.13674"> arXiv:2411.13674 </a> [<a href="/pdf/2411.13674" title="Download PDF" id="pdf-2411.13674" aria-labelledby="pdf-2411.13674">pdf</a>, <a href="https://arxiv.org/html/2411.13674v1" title="View HTML" id="html-2411.13674" aria-labelledby="html-2411.13674" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13674" title="Other formats" id="oth-2411.13674" aria-labelledby="oth-2411.13674">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> FabuLight-ASD: Unveiling Speech Activity via Body Language </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Carneiro,+H">Hugo Carneiro</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wermter,+S">Stefan Wermter</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 23 pages, 8 figures, 3 tables, accepted for publication in Neural Computing and Applications </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Machine Learning (cs.LG); Neural and Evolutionary Computing (cs.NE); Sound (cs.SD); Audio and Speech Processing (eess.AS) </div> <p class='mathjax'> Active speaker detection (ASD) in multimodal environments is crucial for various applications, from video conferencing to human-robot interaction. This paper introduces FabuLight-ASD, an advanced ASD model that integrates facial, audio, and body pose information to enhance detection accuracy and robustness. Our model builds upon the existing Light-ASD framework by incorporating human pose data, represented through skeleton graphs, which minimises computational overhead. Using the Wilder Active Speaker Detection (WASD) dataset, renowned for reliable face and body bounding box annotations, we demonstrate FabuLight-ASD's effectiveness in real-world scenarios. Achieving an overall mean average precision (mAP) of 94.3%, FabuLight-ASD outperforms Light-ASD, which has an overall mAP of 93.7% across various challenging scenarios. The incorporation of body pose information shows a particularly advantageous impact, with notable improvements in mAP observed in scenarios with speech impairment, face occlusion, and human voice background noise. Furthermore, efficiency analysis indicates only a modest increase in parameter count (27.3%) and multiply-accumulate operations (up to 2.4%), underscoring the model's efficiency and feasibility. These findings validate the efficacy of FabuLight-ASD in enhancing ASD performance through the integration of body pose data. FabuLight-ASD's code and model weights are available at <a href="https://github.com/knowledgetechnologyuhh/FabuLight-ASD" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item45'>[45]</a> <a href ="/abs/2411.13676" title="Abstract" id="2411.13676"> arXiv:2411.13676 </a> [<a href="/pdf/2411.13676" title="Download PDF" id="pdf-2411.13676" aria-labelledby="pdf-2411.13676">pdf</a>, <a href="https://arxiv.org/html/2411.13676v1" title="View HTML" id="html-2411.13676" aria-labelledby="html-2411.13676" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13676" title="Other formats" id="oth-2411.13676" aria-labelledby="oth-2411.13676">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Hymba: A Hybrid-head Architecture for Small Language Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Dong,+X">Xin Dong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fu,+Y">Yonggan Fu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Diao,+S">Shizhe Diao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Byeon,+W">Wonmin Byeon</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+Z">Zijia Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mahabaleshwarkar,+A+S">Ameya Sunil Mahabaleshwarkar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+S">Shih-Yang Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Van+Keirsbilck,+M">Matthijs Van Keirsbilck</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+M">Min-Hung Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Suhara,+Y">Yoshi Suhara</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lin,+Y">Yingyan Lin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kautz,+J">Jan Kautz</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Molchanov,+P">Pavlo Molchanov</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 20 pages, models are available on huggingface </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> We propose Hymba, a family of small language models featuring a hybrid-head parallel architecture that integrates transformer attention mechanisms with state space models (SSMs) for enhanced efficiency. Attention heads provide high-resolution recall, while SSM heads enable efficient context summarization. Additionally, we introduce learnable meta tokens that are prepended to prompts, storing critical information and alleviating the "forced-to-attend" burden associated with attention mechanisms. This model is further optimized by incorporating cross-layer key-value (KV) sharing and partial sliding window attention, resulting in a compact cache size. During development, we conducted a controlled study comparing various architectures under identical settings and observed significant advantages of our proposed architecture. Notably, Hymba achieves state-of-the-art results for small LMs: Our Hymba-1.5B-Base model surpasses all sub-2B public models in performance and even outperforms Llama-3.2-3B with 1.32% higher average accuracy, an 11.67x cache size reduction, and 3.49x throughput. </p> </div> </dd> <dt> <a name='item46'>[46]</a> <a href ="/abs/2411.13677" title="Abstract" id="2411.13677"> arXiv:2411.13677 </a> [<a href="/pdf/2411.13677" title="Download PDF" id="pdf-2411.13677" aria-labelledby="pdf-2411.13677">pdf</a>, <a href="https://arxiv.org/html/2411.13677v1" title="View HTML" id="html-2411.13677" aria-labelledby="html-2411.13677" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13677" title="Other formats" id="oth-2411.13677" aria-labelledby="oth-2411.13677">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Bimanual Dexterity for Complex Tasks </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Shaw,+K">Kenneth Shaw</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+Y">Yulong Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+J">Jiahui Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kumar,+M">Mohan Kumar Srirama</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+R">Ray Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xiong,+H">Haoyu Xiong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mendonca,+R">Russell Mendonca</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pathak,+D">Deepak Pathak</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> In CoRL 2024. Website at <a href="https://bidex-teleop.github.io/" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span>; Artificial Intelligence (cs.AI); Computer Vision and Pattern Recognition (cs.CV); Machine Learning (cs.LG) </div> <p class='mathjax'> To train generalist robot policies, machine learning methods often require a substantial amount of expert human teleoperation data. An ideal robot for humans collecting data is one that closely mimics them: bimanual arms and dexterous hands. However, creating such a bimanual teleoperation system with over 50 DoF is a significant challenge. To address this, we introduce Bidex, an extremely dexterous, low-cost, low-latency and portable bimanual dexterous teleoperation system which relies on motion capture gloves and teacher arms. We compare Bidex to a Vision Pro teleoperation system and a SteamVR system and find Bidex to produce better quality data for more complex tasks at a faster rate. Additionally, we show Bidex operating a mobile bimanual robot for in the wild tasks. The robot hands (5k USD) and teleoperation system (7k USD) is readily reproducible and can be used on many robot arms including two xArms (16k USD). Website at <a href="https://bidex-teleop.github.io/" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </p> </div> </dd> <dt> <a name='item47'>[47]</a> <a href ="/abs/2411.13681" title="Abstract" id="2411.13681"> arXiv:2411.13681 </a> [<a href="/pdf/2411.13681" title="Download PDF" id="pdf-2411.13681" aria-labelledby="pdf-2411.13681">pdf</a>, <a href="/format/2411.13681" title="Other formats" id="oth-2411.13681" aria-labelledby="oth-2411.13681">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Elephant in the Room: Dissecting and Reflecting on the Evolution of Online Social Network Research </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Pajola,+L">Luca Pajola</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Schr%C3%B6er,+S+L">Saskia Laura Schr枚er</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tricomi,+P+P">Pier Paolo Tricomi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Conti,+M">Mauro Conti</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Apruzzese,+G">Giovanni Apruzzese</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted at ICWSM 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Social and Information Networks (cs.SI)</span> </div> <p class='mathjax'> Billions of individuals engage with Online Social Networks (OSN) daily. The owners of OSN try to meet the demands of their end-users while complying with business necessities. Such necessities may, however, lead to the adoption of restrictive data access policies that hinder research activities from "external" scientists -- who may, in turn, resort to other means (e.g., rely on static datasets) for their studies. Given the abundance of literature on OSN, we -- as academics -- should take a step back and reflect on what we have done so far, after having written thousands of papers on OSN. This is the first paper that provides a holistic outlook to the entire body of research that focused on OSN -- since the seminal work by Acquisti and Gross (2006). First, we search through over 1 million peer-reviewed publications, and derive 13,842 papers that focus on OSN: we organize the metadata of these works in the Minerva-OSN dataset, the first of its kind -- which we publicly release. Next, by analyzing Minerva-OSN, we provide factual evidence elucidating trends and aspects that deserve to be brought to light, such as the predominant focus on Twitter or the difficulty in obtaining OSN data. Finally, as a constructive step to guide future research, we carry out an expert survey (n=50) with established scientists in this field, and coalesce suggestions to improve the status quo such as an increased involvement of OSN owners. Our findings should inspire a reflection to "rescue" research on OSN. Doing so would improve the overall OSN ecosystem, benefiting both their owners and end-users and, hence, our society. </p> </div> </dd> <dt> <a name='item48'>[48]</a> <a href ="/abs/2411.13682" title="Abstract" id="2411.13682"> arXiv:2411.13682 </a> [<a href="/pdf/2411.13682" title="Download PDF" id="pdf-2411.13682" aria-labelledby="pdf-2411.13682">pdf</a>, <a href="https://arxiv.org/html/2411.13682v1" title="View HTML" id="html-2411.13682" aria-labelledby="html-2411.13682" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13682" title="Other formats" id="oth-2411.13682" aria-labelledby="oth-2411.13682">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Differentially Private Learning Beyond the Classical Dimensionality Regime </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Dwork,+C">Cynthia Dwork</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tankala,+P">Pranay Tankala</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+L">Linjun Zhang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Cryptography and Security (cs.CR); Data Structures and Algorithms (cs.DS) </div> <p class='mathjax'> We initiate the study of differentially private learning in the proportional dimensionality regime, in which the number of data samples $n$ and problem dimension $d$ approach infinity at rates proportional to one another, meaning that $d / n \to \delta$ as $n \to \infty$ for an arbitrary, given constant $\delta \in (0, \infty)$. This setting is significantly more challenging than that of all prior theoretical work in high-dimensional differentially private learning, which, despite the name, has assumed that $\delta = 0$ or is sufficiently small for problems of sample complexity $O(d)$, a regime typically considered "low-dimensional" or "classical" by modern standards in high-dimensional statistics. <br>We provide sharp theoretical estimates of the error of several well-studied differentially private algorithms for robust linear regression and logistic regression, including output perturbation, objective perturbation, and noisy stochastic gradient descent, in the proportional dimensionality regime. The $1 + o(1)$ factor precision of our error estimates enables a far more nuanced understanding of the price of privacy of these algorithms than that afforded by existing, coarser analyses, which are essentially vacuous in the regime we consider. <br>We incorporate several probabilistic tools that have not previously been used to analyze differentially private learning algorithms, such as a modern Gaussian comparison inequality and recent universality laws with origins in statistical physics. </p> </div> </dd> <dt> <a name='item49'>[49]</a> <a href ="/abs/2411.13683" title="Abstract" id="2411.13683"> arXiv:2411.13683 </a> [<a href="/pdf/2411.13683" title="Download PDF" id="pdf-2411.13683" aria-labelledby="pdf-2411.13683">pdf</a>, <a href="https://arxiv.org/html/2411.13683v1" title="View HTML" id="html-2411.13683" aria-labelledby="html-2411.13683" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13683" title="Other formats" id="oth-2411.13683" aria-labelledby="oth-2411.13683">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Extending Video Masked Autoencoders to 128 frames </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Gundavarapu,+N+B">Nitesh Bharadwaj Gundavarapu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Friedman,+L">Luke Friedman</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Goyal,+R">Raghav Goyal</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hegde,+C">Chaitra Hegde</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Agustsson,+E">Eirikur Agustsson</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Waghmare,+S+M">Sagar M. Waghmare</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sirotenko,+M">Mikhail Sirotenko</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+M">Ming-Hsuan Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Weyand,+T">Tobias Weyand</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gong,+B">Boqing Gong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sigal,+L">Leonid Sigal</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 10.5 pages of main paper, 25 pages total, 4 figures and 10 tables. To appear in NeurIPS'24 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Video understanding has witnessed significant progress with recent video foundation models demonstrating strong performance owing to self-supervised pre-training objectives; Masked Autoencoders (MAE) being the design of choice. Nevertheless, the majority of prior works that leverage MAE pre-training have focused on relatively short video representations (16 / 32 frames in length) largely due to hardware memory and compute limitations that scale poorly with video length due to the dense memory-intensive self-attention decoding. One natural strategy to address these challenges is to subsample tokens to reconstruct during decoding (or decoder masking). In this work, we propose an effective strategy for prioritizing tokens which allows training on longer video sequences (128 frames) and gets better performance than, more typical, random and uniform masking strategies. The core of our approach is an adaptive decoder masking strategy that prioritizes the most important tokens and uses quantized tokens as reconstruction objectives. Our adaptive strategy leverages a powerful MAGVIT-based tokenizer that jointly learns the tokens and their priority. We validate our design choices through exhaustive ablations and observe improved performance of the resulting long-video (128 frames) encoders over short-video (32 frames) counterparts. With our long-video masked autoencoder (LVMAE) strategy, we surpass state-of-the-art on Diving48 by 3.9 points and EPIC-Kitchens-100 verb classification by 2.5 points while relying on a simple core architecture and video-only pre-training (unlike some of the prior works that require millions of labeled video-text pairs or specialized encoders). </p> </div> </dd> <dt> <a name='item50'>[50]</a> <a href ="/abs/2411.13687" title="Abstract" id="2411.13687"> arXiv:2411.13687 </a> [<a href="/pdf/2411.13687" title="Download PDF" id="pdf-2411.13687" aria-labelledby="pdf-2411.13687">pdf</a>, <a href="https://arxiv.org/html/2411.13687v1" title="View HTML" id="html-2411.13687" aria-labelledby="html-2411.13687" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13687" title="Other formats" id="oth-2411.13687" aria-labelledby="oth-2411.13687">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Hierarchical Text Classification (HTC) vs. eXtreme Multilabel Classification (XML): Two Sides of the Same Medal </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Bertalis,+N">Nerijus Bertalis</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Granse,+P">Paul Granse</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=G%C3%BCl,+F">Ferhat G眉l</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hauss,+F">Florian Hauss</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Menkel,+L">Leon Menkel</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sch%C3%BCler,+D">David Sch眉ler</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Speier,+T">Tom Speier</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Galke,+L">Lukas Galke</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Scherp,+A">Ansgar Scherp</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span> </div> <p class='mathjax'> Assigning a subset of labels from a fixed pool of labels to a given input text is a text classification problem with many real-world applications, such as in recommender systems. Two separate research streams address this issue. Hierarchical Text Classification (HTC) focuses on datasets with smaller label pools of hundreds of entries, accompanied by a semantic label hierarchy. In contrast, eXtreme Multi-Label Text Classification (XML) considers very large label pools with up to millions of entries, in which the labels are not arranged in any particular manner. However, in XML, a common approach is to construct an artificial hierarchy without any semantic information before or during the training process. Here, we investigate how state-of-the-art models from one domain perform when trained and tested on datasets from the other domain. The HBGL and HGLCR models from the HTC domain are trained and tested on the datasets Wiki10-31K, AmazonCat-13K, and Amazon-670K from the XML domain. On the other side, the XML models CascadeXML and XR-Transformer are trained and tested on the datasets Web of Science, The New York Times Annotated Corpus, and RCV1-V2 from the HTC domain. HTC models, on the other hand, are not equipped to handle the size of XML datasets and achieve poor transfer results. The code and numerous files that are needed to reproduce our results can be obtained from <a href="https://github.com/FloHauss/XMC_HTC" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </p> </div> </dd> <dt> <a name='item51'>[51]</a> <a href ="/abs/2411.13688" title="Abstract" id="2411.13688"> arXiv:2411.13688 </a> [<a href="/pdf/2411.13688" title="Download PDF" id="pdf-2411.13688" aria-labelledby="pdf-2411.13688">pdf</a>, <a href="/format/2411.13688" title="Other formats" id="oth-2411.13688" aria-labelledby="oth-2411.13688">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Investigating Graph Neural Networks and Classical Feature-Extraction Techniques in Activity-Cliff and Molecular Property Prediction </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Dablander,+M">Markus Dablander</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Doctoral Thesis (Mathematical Institute, University of Oxford) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Biomolecules (q-bio.BM); Machine Learning (stat.ML) </div> <p class='mathjax'> Molecular featurisation refers to the transformation of molecular data into numerical feature vectors. It is one of the key research areas in molecular machine learning and computational drug discovery. Recently, message-passing graph neural networks (GNNs) have emerged as a novel method to learn differentiable features directly from molecular graphs. While such techniques hold great promise, further investigations are needed to clarify if and when they indeed manage to definitively outcompete classical molecular featurisations such as extended-connectivity fingerprints (ECFPs) and physicochemical-descriptor vectors (PDVs). We systematically explore and further develop classical and graph-based molecular featurisation methods for two important tasks: molecular property prediction, in particular, quantitative structure-activity relationship (QSAR) prediction, and the largely unexplored challenge of activity-cliff (AC) prediction. We first give a technical description and critical analysis of PDVs, ECFPs and message-passing GNNs, with a focus on graph isomorphism networks (GINs). We then conduct a rigorous computational study to compare the performance of PDVs, ECFPs and GINs for QSAR and AC-prediction. Following this, we mathematically describe and computationally evaluate a novel twin neural network model for AC-prediction. We further introduce an operation called substructure pooling for the vectorisation of structural fingerprints as a natural counterpart to graph pooling in GNN architectures. We go on to propose Sort & Slice, a simple substructure-pooling technique for ECFPs that robustly outperforms hash-based folding at molecular property prediction. Finally, we outline two ideas for future research: (i) a graph-based self-supervised learning strategy to make classical molecular featurisations trainable, and (ii) trainable substructure-pooling via differentiable self-attention. </p> </div> </dd> <dt> <a name='item52'>[52]</a> <a href ="/abs/2411.13690" title="Abstract" id="2411.13690"> arXiv:2411.13690 </a> [<a href="/pdf/2411.13690" title="Download PDF" id="pdf-2411.13690" aria-labelledby="pdf-2411.13690">pdf</a>, <a href="https://arxiv.org/html/2411.13690v1" title="View HTML" id="html-2411.13690" aria-labelledby="html-2411.13690" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13690" title="Other formats" id="oth-2411.13690" aria-labelledby="oth-2411.13690">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Multi-Agent Best Arm Identification in Stochastic Linear Bandits </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Agrawal,+S">Sanjana Agrawal</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Blanco,+S+A">Sa煤l A. Blanco</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> We study the problem of collaborative best-arm identification in stochastic linear bandits under a fixed-budget scenario. In our learning model, we consider multiple agents connected through a star network or a generic network, interacting with a linear bandit instance in parallel. The objective of the agents is to collaboratively learn the best arm of the given bandit instance with the help of a central server while minimizing the probability of error in best arm estimation. For this purpose, we devise the algorithms MaLinBAI-Star and MaLinBAI-Gen for star networks and generic networks respectively. Both algorithms employ an Upper-Confidence-Bound approach where agents share their knowledge through the central server during each communication round. We demonstrate, both theoretically and empirically, that our algorithms enjoy exponentially decaying probability of error in the allocated time budget. Furthermore, experimental results based on synthetic and real-world data validate the effectiveness of our algorithms over the existing multi-agent algorithms. </p> </div> </dd> <dt> <a name='item53'>[53]</a> <a href ="/abs/2411.13691" title="Abstract" id="2411.13691"> arXiv:2411.13691 </a> [<a href="/pdf/2411.13691" title="Download PDF" id="pdf-2411.13691" aria-labelledby="pdf-2411.13691">pdf</a>, <a href="https://arxiv.org/html/2411.13691v1" title="View HTML" id="html-2411.13691" aria-labelledby="html-2411.13691" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13691" title="Other formats" id="oth-2411.13691" aria-labelledby="oth-2411.13691">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Retrieval-Augmented Generation for Domain-Specific Question Answering: A Case Study on Pittsburgh and CMU </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Sun,+H">Haojia Sun</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Y">Yaqi Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+S">Shuting Zhang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Computation and Language (cs.CL) </div> <p class='mathjax'> We designed a Retrieval-Augmented Generation (RAG) system to provide large language models with relevant documents for answering domain-specific questions about Pittsburgh and Carnegie Mellon University (CMU). We extracted over 1,800 subpages using a greedy scraping strategy and employed a hybrid annotation process, combining manual and Mistral-generated question-answer pairs, achieving an inter-annotator agreement (IAA) score of 0.7625. Our RAG framework integrates BM25 and FAISS retrievers, enhanced with a reranker for improved document retrieval accuracy. Experimental results show that the RAG system significantly outperforms a non-RAG baseline, particularly in time-sensitive and complex queries, with an F1 score improvement from 5.45% to 42.21% and recall of 56.18%. This study demonstrates the potential of RAG systems in enhancing answer precision and relevance, while identifying areas for further optimization in document retrieval and model training. </p> </div> </dd> <dt> <a name='item54'>[54]</a> <a href ="/abs/2411.13693" title="Abstract" id="2411.13693"> arXiv:2411.13693 </a> [<a href="/pdf/2411.13693" title="Download PDF" id="pdf-2411.13693" aria-labelledby="pdf-2411.13693">pdf</a>, <a href="https://arxiv.org/html/2411.13693v1" title="View HTML" id="html-2411.13693" aria-labelledby="html-2411.13693" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13693" title="Other formats" id="oth-2411.13693" aria-labelledby="oth-2411.13693">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> PairSonic: Helping Groups Securely Exchange Contact Information </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Putz,+F">Florentin Putz</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Haesler,+S">Steffen Haesler</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=V%C3%B6lkl,+T">Thomas V枚lkl</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gehring,+M">Maximilian Gehring</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rollshausen,+N">Nils Rollshausen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hollick,+M">Matthias Hollick</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 3 pages, 1 figure, the source code is available at <a href="https://github.com/seemoo-lab/pairsonic" rel="external noopener nofollow" class="link-external link-https">this https URL</a> and a demo video is available at <a href="https://www.youtube.com/watch?v=e1AMYDLWN0E" rel="external noopener nofollow" class="link-external link-https">this https URL</a> and further project information is at <a href="https://fputz.net/pairsonic" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> ACM CSCW 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Cryptography and Security (cs.CR)</span>; Human-Computer Interaction (cs.HC); Networking and Internet Architecture (cs.NI) </div> <p class='mathjax'> Securely exchanging contact information is essential for establishing trustworthy communication channels that facilitate effective online collaboration. However, current methods are neither user-friendly nor scalable for large groups of users. In response, we introduce PairSonic, a novel group pairing protocol that extends trust from physical encounters to online communication. PairSonic simplifies the pairing process by automating the tedious verification tasks of previous methods through an acoustic out-of-band channel using smartphones' built-in hardware. Our protocol not only facilitates connecting users for computer-supported collaboration, but also provides a more user-friendly and scalable solution to the authentication ceremonies currently used in end-to-end encrypted messengers like Signal or WhatsApp. PairSonic is available as open-source software: <a href="https://github.com/seemoo-lab/pairsonic" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </p> </div> </dd> <dt> <a name='item55'>[55]</a> <a href ="/abs/2411.13694" title="Abstract" id="2411.13694"> arXiv:2411.13694 </a> [<a href="/pdf/2411.13694" title="Download PDF" id="pdf-2411.13694" aria-labelledby="pdf-2411.13694">pdf</a>, <a href="https://arxiv.org/html/2411.13694v1" title="View HTML" id="html-2411.13694" aria-labelledby="html-2411.13694" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13694" title="Other formats" id="oth-2411.13694" aria-labelledby="oth-2411.13694">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Sounds Good? Fast and Secure Contact Exchange in Groups </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Putz,+F">Florentin Putz</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Haesler,+S">Steffen Haesler</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hollick,+M">Matthias Hollick</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 44 pages, 8 figures, the dataset is available at <a href="https://doi.org/10.5281/zenodo.13324112" rel="external noopener nofollow" class="link-external link-https">this https URL</a> and a demo video is available at <a href="https://www.youtube.com/watch?v=e1AMYDLWN0E" rel="external noopener nofollow" class="link-external link-https">this https URL</a> and further project information is at <a href="https://fputz.net/pairsonic" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> Proc. ACM Hum.-Comput. Interact. 8, CSCW2, Article 425 (November 2024), 44 pages </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Human-Computer Interaction (cs.HC)</span>; Cryptography and Security (cs.CR); Networking and Internet Architecture (cs.NI) </div> <p class='mathjax'> Trustworthy digital communication requires the secure exchange of contact information, but current approaches lack usability and scalability for larger groups of users. We evaluate the usability of two secure contact exchange systems: the current state of the art, SafeSlinger, and our newly designed protocol, PairSonic, which extends trust from physical encounters to spontaneous online communication. Our lab study (N=45) demonstrates PairSonic's superior usability, automating the tedious verification tasks from previous approaches via an acoustic out-of-band channel. Although participants significantly preferred our system, minimizing user effort surprisingly decreased the perceived security for some users, who associated security with complexity. We discuss user perceptions of the different protocol components and identify remaining usability barriers for CSCW application scenarios. </p> </div> </dd> <dt> <a name='item56'>[56]</a> <a href ="/abs/2411.13697" title="Abstract" id="2411.13697"> arXiv:2411.13697 </a> [<a href="/pdf/2411.13697" title="Download PDF" id="pdf-2411.13697" aria-labelledby="pdf-2411.13697">pdf</a>, <a href="/format/2411.13697" title="Other formats" id="oth-2411.13697" aria-labelledby="oth-2411.13697">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Decompose and Leverage Preferences from Expert Models for Improving Trustworthiness of MLLMs </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Cao,+R">Rui Cao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jiang,+Y">Yuming Jiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Schlichtkrull,+M">Michael Schlichtkrull</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Vlachos,+A">Andreas Vlachos</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Multimodal Large Language Models (MLLMs) can enhance trustworthiness by aligning with human preferences. As human preference labeling is laborious, recent works employ evaluation models for assessing MLLMs' responses, using the model-based assessments to automate preference dataset construction. This approach, however, faces challenges with MLLMs' lengthy and compositional responses, which often require diverse reasoning skills that a single evaluation model may not fully possess. Additionally, most existing methods rely on closed-source models as evaluators. To address limitations, we propose DecompGen, a decomposable framework that uses an ensemble of open-sourced expert models. DecompGen breaks down each response into atomic verification tasks, assigning each task to an appropriate expert model to generate fine-grained assessments. The DecompGen feedback is used to automatically construct our preference dataset, DGPref. MLLMs aligned with DGPref via preference learning show improvements in trustworthiness, demonstrating the effectiveness of DecompGen. </p> </div> </dd> <dt> <a name='item57'>[57]</a> <a href ="/abs/2411.13699" title="Abstract" id="2411.13699"> arXiv:2411.13699 </a> [<a href="/pdf/2411.13699" title="Download PDF" id="pdf-2411.13699" aria-labelledby="pdf-2411.13699">pdf</a>, <a href="/format/2411.13699" title="Other formats" id="oth-2411.13699" aria-labelledby="oth-2411.13699">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Test Security in Remote Testing Age: Perspectives from Process Data Analytics and AI </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Hao,+J">Jiangang Hao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fauss,+M">Michael Fauss</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 23 pages, 8 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Cryptography and Security (cs.CR)</span>; Computation and Language (cs.CL); Human-Computer Interaction (cs.HC) </div> <p class='mathjax'> The COVID-19 pandemic has accelerated the implementation and acceptance of remotely proctored high-stake assessments. While the flexible administration of the tests brings forth many values, it raises test security-related concerns. Meanwhile, artificial intelligence (AI) has witnessed tremendous advances in the last five years. Many AI tools (such as the very recent ChatGPT) can generate high-quality responses to test items. These new developments require test security research beyond the statistical analysis of scores and response time. Data analytics and AI methods based on clickstream process data can get us deeper insight into the test-taking process and hold great promise for securing remotely administered high-stakes tests. This chapter uses real-world examples to show that this is indeed the case. </p> </div> </dd> <dt> <a name='item58'>[58]</a> <a href ="/abs/2411.13700" title="Abstract" id="2411.13700"> arXiv:2411.13700 </a> [<a href="/pdf/2411.13700" title="Download PDF" id="pdf-2411.13700" aria-labelledby="pdf-2411.13700">pdf</a>, <a href="https://arxiv.org/html/2411.13700v1" title="View HTML" id="html-2411.13700" aria-labelledby="html-2411.13700" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13700" title="Other formats" id="oth-2411.13700" aria-labelledby="oth-2411.13700">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Collaborative Ensemble Framework for CTR Prediction </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+X">Xiaolong Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zeng,+Z">Zhichen Zeng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+X">Xiaoyi Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yuan,+S">Siyang Yuan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Song,+W">Weinan Song</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hang,+M">Mengyue Hang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Y">Yiqun Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+C">Chaofei Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+D">Donghyun Kim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+W">Wen-Yen Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+J">Jiyan Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Han,+Y">Yiping Han</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jin,+R">Rong Jin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Long,+B">Bo Long</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tong,+H">Hanghang Tong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+P+S">Philip S. Yu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Information Retrieval (cs.IR)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Recent advances in foundation models have established scaling laws that enable the development of larger models to achieve enhanced performance, motivating extensive research into large-scale recommendation models. However, simply increasing the model size in recommendation systems, even with large amounts of data, does not always result in the expected performance improvements. In this paper, we propose a novel framework, Collaborative Ensemble Training Network (CETNet), to leverage multiple distinct models, each with its own embedding table, to capture unique feature interaction patterns. Unlike naive model scaling, our approach emphasizes diversity and collaboration through collaborative learning, where models iteratively refine their predictions. To dynamically balance contributions from each model, we introduce a confidence-based fusion mechanism using general softmax, where model confidence is computed via negation entropy. This design ensures that more confident models have a greater influence on the final prediction while benefiting from the complementary strengths of other models. We validate our framework on three public datasets (AmazonElectronics, TaobaoAds, and KuaiVideo) as well as a large-scale industrial dataset from Meta, demonstrating its superior performance over individual models and state-of-the-art baselines. Additionally, we conduct further experiments on the Criteo and Avazu datasets to compare our method with the multi-embedding paradigm. Our results show that our framework achieves comparable or better performance with smaller embedding sizes, offering a scalable and efficient solution for CTR prediction tasks. </p> </div> </dd> <dt> <a name='item59'>[59]</a> <a href ="/abs/2411.13704" title="Abstract" id="2411.13704"> arXiv:2411.13704 </a> [<a href="/pdf/2411.13704" title="Download PDF" id="pdf-2411.13704" aria-labelledby="pdf-2411.13704">pdf</a>, <a href="https://arxiv.org/html/2411.13704v1" title="View HTML" id="html-2411.13704" aria-labelledby="html-2411.13704" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13704" title="Other formats" id="oth-2411.13704" aria-labelledby="oth-2411.13704">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Towards Query Optimizer as a Service (QOaaS) in a Unified LakeHouse Ecosystem: Can One QO Rule Them All? </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Alotaibi,+R">Rana Alotaibi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tian,+Y">Yuanyuan Tian</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Grafberger,+S">Stefan Grafberger</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Camacho-Rodr%C3%ADguez,+J">Jes煤s Camacho-Rodr铆guez</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bruno,+N">Nicolas Bruno</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kroth,+B">Brian Kroth</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Matusevych,+S">Sergiy Matusevych</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Agrawal,+A">Ashvin Agrawal</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Behera,+M">Mahesh Behera</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gosalia,+A">Ashit Gosalia</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Galindo-Legaria,+C">Cesar Galindo-Legaria</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Joshi,+M">Milind Joshi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Potocnik,+M">Milan Potocnik</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sezgin,+B">Beysim Sezgin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+X">Xiaoyu Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Curino,+C">Carlo Curino</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Databases (cs.DB)</span> </div> <p class='mathjax'> Customer demand, regulatory pressure, and engineering efficiency are the driving forces behind the industry-wide trend of moving from siloed engines and services that are optimized in isolation to highly integrated solutions. This is confirmed by the wide adoption of open formats, shared component libraries, and the meteoric success of integrated data lake experiences such as Microsoft Fabric. <br>In this paper, we study the implications of this trend to Query Optimizer (QO) and discuss our experience of building Calcite and extending Cascades into QO components of Microsoft SQL Server, Fabric Data Warehouse (DW), and SCOPE. We weigh the pros and cons of a drastic change in direction: moving from bespoke QOs or library-sharing (脿 la Calcite) to rewriting the QO stack and fully embracing Query Optimizer as a Service (QOaaS). We report on some early successes and stumbles as we explore these ideas with prototypes compatible with Fabric DW and Spark. The benefits include centralized workload-level optimizations, multi-engine federation, and accelerated feature creation, but the challenges are equally daunting. We plan to engage CIDR audience in a debate on this exciting topic. </p> </div> </dd> <dt> <a name='item60'>[60]</a> <a href ="/abs/2411.13708" title="Abstract" id="2411.13708"> arXiv:2411.13708 </a> [<a href="/pdf/2411.13708" title="Download PDF" id="pdf-2411.13708" aria-labelledby="pdf-2411.13708">pdf</a>, <a href="https://arxiv.org/html/2411.13708v1" title="View HTML" id="html-2411.13708" aria-labelledby="html-2411.13708" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13708" title="Other formats" id="oth-2411.13708" aria-labelledby="oth-2411.13708">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Comments on "$\mathcal{O}(m\cdot n)$ algorithms for the recognition and isomorphism problems on circular-arc graphs" </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Krawczyk,+T">Tomasz Krawczyk</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Comment on doi:<a href="https://doi.org/10.1137/S0097539793260726" data-doi="10.1137/S0097539793260726" class="link-https link-external" rel="external noopener nofollow">https://doi.org/10.1137/S0097539793260726</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Data Structures and Algorithms (cs.DS)</span>; Combinatorics (math.CO) </div> <p class='mathjax'> In the work [$\mathcal{O}(m\cdot n)$ algorithms for the recognition and isomorphism problems on circular-arc graphs, SIAM J. Comput. 24(3), 411--439, (1995)], Wen-Lian Hsu claims three results concerning the class of circular-arc graphs: - the design of so-called \emph{decomposition trees} that represent the structure of all normalized intersection models of circular-arc graphs, - an $\mathcal{O}(m\cdot n)$ recognition algorithm for circular-arc graphs, - an $\mathcal{O}(m\cdot n)$ isomorphism algorithm for circular-arc graphs. In [Discrete Math. Theor. Comput. Sci., 15(1), 157--182, 2013] Curtis, Lin, McConnell, Nussbaum, Soulignac, Spinrad, and Szwarcfiter showed that Hsu's isomorphism algorithm is incorrect. In this note, we show that the other two results -- namely, the construction of decomposition trees and the recognition algorithm -- are also flawed. </p> </div> </dd> <dt> <a name='item61'>[61]</a> <a href ="/abs/2411.13710" title="Abstract" id="2411.13710"> arXiv:2411.13710 </a> [<a href="/pdf/2411.13710" title="Download PDF" id="pdf-2411.13710" aria-labelledby="pdf-2411.13710">pdf</a>, <a href="/format/2411.13710" title="Other formats" id="oth-2411.13710" aria-labelledby="oth-2411.13710">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Assessing the Impact of Electric Vehicle Charging on Residential Distribution Grids </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Raffoul,+E">Elias Raffoul</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Li,+X">Xingpeng Li</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Systems and Control (eess.SY)</span> </div> <p class='mathjax'> To achieve net-zero carbon emissions, electrification in the transportation sector plays an important role. Significant increase of electric vehicles (EV) has been observed nationally and globally. While the transition to EVs presents substantial environmental benefits, it would lead to several challenges to the power grid due to EV charging activities. Growing EVs greatly increase peak loads on residential grids, particularly during evening charging periods. This surge can result in operational challenges, including greater voltage drops, increased power losses, and potential overloading violations, compromising grid reliability and efficiency. This study focuses on determining ampacity violations, and analyzing line loading levels in a 240-bus distribution system with 1120 customers, located in the Midwest U.S. By simulating a range of charging scenarios and evaluating EV chargers with varying power capacities under different distribution system voltage levels, this research aims to identify lines at risk of ampacity violations for various EV charging penetration rates up to 100%. The findings will provide valuable insights for utilities and grid operators, informing strategies for voltage level adjustments and necessary infrastructure reinforcements to effectively accommodate the growing energy demands associated with widespread EV adoption. </p> </div> </dd> <dt> <a name='item62'>[62]</a> <a href ="/abs/2411.13711" title="Abstract" id="2411.13711"> arXiv:2411.13711 </a> [<a href="/pdf/2411.13711" title="Download PDF" id="pdf-2411.13711" aria-labelledby="pdf-2411.13711">pdf</a>, <a href="https://arxiv.org/html/2411.13711v1" title="View HTML" id="html-2411.13711" aria-labelledby="html-2411.13711" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13711" title="Other formats" id="oth-2411.13711" aria-labelledby="oth-2411.13711">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Almost Sure Convergence Rates and Concentration of Stochastic Approximation and Reinforcement Learning with Markovian Noise </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Qian,+X">Xiaochi Qian</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xie,+Z">Zixuan Xie</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+X">Xinyu Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+S">Shangtong Zhang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Optimization and Control (math.OC); Machine Learning (stat.ML) </div> <p class='mathjax'> This paper establishes the first almost sure convergence rate and the first maximal concentration bound with exponential tails for general contractive stochastic approximation algorithms with Markovian noise. As a corollary, we also obtain convergence rates in $L^p$. Key to our successes is a novel discretization of the mean ODE of stochastic approximation algorithms using intervals with diminishing (instead of constant) length. As applications, we provide the first almost sure convergence rate for $Q$-learning with Markovian samples without count-based learning rates. We also provide the first concentration bound for off-policy temporal difference learning with Markovian samples. </p> </div> </dd> <dt> <a name='item63'>[63]</a> <a href ="/abs/2411.13716" title="Abstract" id="2411.13716"> arXiv:2411.13716 </a> [<a href="/pdf/2411.13716" title="Download PDF" id="pdf-2411.13716" aria-labelledby="pdf-2411.13716">pdf</a>, <a href="https://arxiv.org/html/2411.13716v1" title="View HTML" id="html-2411.13716" aria-labelledby="html-2411.13716" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13716" title="Other formats" id="oth-2411.13716" aria-labelledby="oth-2411.13716">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Developing Normative Gait Cycle Parameters for Clinical Analysis Using Human Pose Estimation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ranjan,+R">Rahm Ranjan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ahmedt-Aristizabal,+D">David Ahmedt-Aristizabal</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Armin,+M+A">Mohammad Ali Armin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+J">Juno Kim</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Gait analysis using computer vision is an emerging field in AI, offering clinicians an objective, multi-feature approach to analyse complex movements. Despite its promise, current applications using RGB video data alone are limited in measuring clinically relevant spatial and temporal kinematics and establishing normative parameters essential for identifying movement abnormalities within a gait cycle. This paper presents a data-driven method using RGB video data and 2D human pose estimation for developing normative kinematic gait parameters. By analysing joint angles, an established kinematic measure in biomechanics and clinical practice, we aim to enhance gait analysis capabilities and improve explainability. Our cycle-wise kinematic analysis enables clinicians to simultaneously measure and compare multiple joint angles, assessing individuals against a normative population using just monocular RGB video. This approach expands clinical capacity, supports objective decision-making, and automates the identification of specific spatial and temporal deviations and abnormalities within the gait cycle. </p> </div> </dd> <dt> <a name='item64'>[64]</a> <a href ="/abs/2411.13717" title="Abstract" id="2411.13717"> arXiv:2411.13717 </a> [<a href="/pdf/2411.13717" title="Download PDF" id="pdf-2411.13717" aria-labelledby="pdf-2411.13717">pdf</a>, <a href="/format/2411.13717" title="Other formats" id="oth-2411.13717" aria-labelledby="oth-2411.13717">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Hardware Accelerators for Artificial Intelligence </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ahsan,+S+M+M">S M Mojahidul Ahsan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dhungel,+A">Anurag Dhungel</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chowdhury,+M">Mrittika Chowdhury</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hasan,+M+S">Md Sakib Hasan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hoque,+T">Tamzidul Hoque</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> The book chapter is a part of the Book, "AI-Enabled Electronic Circuit and System Design" with ISBN 978-3-031-71435-1 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Hardware Architecture (cs.AR)</span>; Emerging Technologies (cs.ET) </div> <p class='mathjax'> In this chapter, we aim to explore an in-depth exploration of the specialized hardware accelerators designed to enhance Artificial Intelligence (AI) applications, focusing on their necessity, development, and impact on the field of AI. It covers the transition from traditional computing systems to advanced AI-specific hardware, addressing the growing demands of AI algorithms and the inefficiencies of conventional architectures. The discussion extends to various types of accelerators, including GPUs, FPGAs, and ASICs, and their roles in optimizing AI workloads. Additionally, it touches on the challenges and considerations in designing and implementing these accelerators, along with future prospects in the evolution of AI hardware. This comprehensive overview aims to equip readers with a clear understanding of the current landscape and future directions in AI hardware development, making it accessible to both experts and newcomers to the field. </p> </div> </dd> <dt> <a name='item65'>[65]</a> <a href ="/abs/2411.13720" title="Abstract" id="2411.13720"> arXiv:2411.13720 </a> [<a href="/pdf/2411.13720" title="Download PDF" id="pdf-2411.13720" aria-labelledby="pdf-2411.13720">pdf</a>, <a href="/format/2411.13720" title="Other formats" id="oth-2411.13720" aria-labelledby="oth-2411.13720">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Distortion of Multi-Winner Elections on the Line Metric: The Polar Comparison Rule </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Babashah,+N">Negar Babashah</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Karimi,+H">Hasti Karimi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Seddighin,+M">Masoud Seddighin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shahkarami,+G">Golnoosh Shahkarami</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Science and Game Theory (cs.GT)</span> </div> <p class='mathjax'> We consider the problem of selecting a committee of $k$ alternatives among $m$ alternatives, based on the ordinal rank list of voters. Our focus is on the case where both voters and alternatives lie on a metric space-specifically, on the line-and the objective is to minimize the additive social cost. The additive social cost is the sum of the costs for all voters, where the cost for each voter is defined as the sum of their distances to each member of the selected committee. <br>We propose a new voting rule, the Polar Comparison Rule, which achieves upper bounds of $1 + \sqrt{2} \approx 2.41$ and $7/3 \approx 2.33$ distortions for $k = 2$ and $k = 3$, respectively, and we show that these bounds are tight. Furthermore, we generalize this rule, showing that it maintains a distortion of roughly $7/3$ based on the remainder of the committee size when divided by three. We also establish lower bounds on the achievable distortion based on the parity of $k$ and for both small and large committee sizes. </p> </div> </dd> <dt> <a name='item66'>[66]</a> <a href ="/abs/2411.13722" title="Abstract" id="2411.13722"> arXiv:2411.13722 </a> [<a href="/pdf/2411.13722" title="Download PDF" id="pdf-2411.13722" aria-labelledby="pdf-2411.13722">pdf</a>, <a href="https://arxiv.org/html/2411.13722v1" title="View HTML" id="html-2411.13722" aria-labelledby="html-2411.13722" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13722" title="Other formats" id="oth-2411.13722" aria-labelledby="oth-2411.13722">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Systematic Literature Review on a Decade of Industrial TLA+ Practice </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=B%C3%B6gli,+R">Roman B枚gli</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lerena,+L">Leandro Lerena</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tsigkanos,+C">Christos Tsigkanos</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kehrer,+T">Timo Kehrer</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> for accompanying data, see <a href="https://zenodo.org/records/13629185" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> Integrated Formal Methods, IFM 2024, LNCS 15234 (2025), pp 24-34 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Software Engineering (cs.SE)</span> </div> <p class='mathjax'> TLA+ is a formal specification language used for designing, modeling, documenting, and verifying systems through model checking. Despite significant interest from the research community, knowledge about usage of the TLA+ ecosystem in practice remains scarce. Industry reports suggest that software engineers could benefit from insights, innovations, and solutions to the practical challenges of TLA+. This paper explores this development by conducting a systematic literature review of TLA+'s industrial usage over the past decade. We analyze the trend in industrial application, characterize its use, examine whether its promised benefits resonate with practitioners, and identify challenges that may hinder further adoption. </p> </div> </dd> <dt> <a name='item67'>[67]</a> <a href ="/abs/2411.13724" title="Abstract" id="2411.13724"> arXiv:2411.13724 </a> [<a href="/pdf/2411.13724" title="Download PDF" id="pdf-2411.13724" aria-labelledby="pdf-2411.13724">pdf</a>, <a href="/format/2411.13724" title="Other formats" id="oth-2411.13724" aria-labelledby="oth-2411.13724">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Exploring Large Language Models for Climate Forecasting </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Y">Yang Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Karimi,+H+A">Hassan A. Karimi</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> With the increasing impacts of climate change, there is a growing demand for accessible tools that can provide reliable future climate information to support planning, finance, and other decision-making applications. Large language models (LLMs), such as GPT-4, present a promising approach to bridging the gap between complex climate data and the general public, offering a way for non-specialist users to obtain essential climate insights through natural language interaction. However, an essential challenge remains under-explored: evaluating the ability of LLMs to provide accurate and reliable future climate predictions, which is crucial for applications that rely on anticipating climate trends. In this study, we investigate the capability of GPT-4 in predicting rainfall at short-term (15-day) and long-term (12-month) scales. We designed a series of experiments to assess GPT's performance under different conditions, including scenarios with and without expert data inputs. Our results indicate that GPT, when operating independently, tends to generate conservative forecasts, often reverting to historical averages in the absence of clear trend signals. This study highlights both the potential and challenges of applying LLMs for future climate predictions, providing insights into their integration with climate-related applications and suggesting directions for enhancing their predictive capabilities in the field. </p> </div> </dd> <dt> <a name='item68'>[68]</a> <a href ="/abs/2411.13728" title="Abstract" id="2411.13728"> arXiv:2411.13728 </a> [<a href="/pdf/2411.13728" title="Download PDF" id="pdf-2411.13728" aria-labelledby="pdf-2411.13728">pdf</a>, <a href="https://arxiv.org/html/2411.13728v1" title="View HTML" id="html-2411.13728" aria-labelledby="html-2411.13728" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13728" title="Other formats" id="oth-2411.13728" aria-labelledby="oth-2411.13728">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Distributed Distance Sensitivity Oracles </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Manoharan,+V">Vignesh Manoharan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ramachandran,+V">Vijaya Ramachandran</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Data Structures and Algorithms (cs.DS)</span> </div> <p class='mathjax'> We present results for the distance sensitivity oracle (DSO) problem, where one needs to preprocess a given directed weighted graph $G=(V,E)$ in order to answer queries about the shortest path distance from $s$ to $t$ in $G$ that avoids edge $e$, for any $s,t \in V, e \in E$. No non-trivial results are known for DSO in the distributed CONGEST model even though it is of importance to maintain efficient communication under an edge failure. <br>Let $n=|V|$, and let $D$ be the undirected diameter of $G$. Our first DSO algorithm optimizes query response rounds and can answer a batch of any $k\geq 1$ queries in $O(k+D)$ rounds after taking $\tilde{O}(n^{3/2})$ rounds to preprocess $G$. Our second algorithm takes $\tilde{O}(n)$ rounds for preprocessing, and then it can answer any batch of $k\geq 1$ queries in $\tilde{O}(k\sqrt{n}+D)$ rounds. We complement these algorithms with some unconditional CONGEST lower bounds that give trade-offs between preprocessing rounds and rounds needed to answer queries. <br>Additionally, we present almost-optimal upper and lower bounds for the related all pairs second simple shortest path (2-APSiSP) problem, where for all pairs of vertices $x,y \in V$, we need to compute the minimum weight of a simple $x$-$y$ path that differs from the precomputed $x$-$y$ shortest path by at least one edge. </p> </div> </dd> <dt> <a name='item69'>[69]</a> <a href ="/abs/2411.13730" title="Abstract" id="2411.13730"> arXiv:2411.13730 </a> [<a href="/pdf/2411.13730" title="Download PDF" id="pdf-2411.13730" aria-labelledby="pdf-2411.13730">pdf</a>, <a href="https://arxiv.org/html/2411.13730v1" title="View HTML" id="html-2411.13730" aria-labelledby="html-2411.13730" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13730" title="Other formats" id="oth-2411.13730" aria-labelledby="oth-2411.13730">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Replicable Online Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ahmadi,+S">Saba Ahmadi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bhandari,+S">Siddharth Bhandari</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Blum,+A">Avrim Blum</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> We investigate the concept of algorithmic replicability introduced by Impagliazzo et al. 2022, Ghazi et al. 2021, Ahn et al. 2024 in an online setting. In our model, the input sequence received by the online learner is generated from time-varying distributions chosen by an adversary (obliviously). Our objective is to design low-regret online algorithms that, with high probability, produce the exact same sequence of actions when run on two independently sampled input sequences generated as described above. We refer to such algorithms as adversarially replicable. Previous works (such as Esfandiari et al. 2022) explored replicability in the online setting under inputs generated independently from a fixed distribution; we term this notion as iid-replicability. Our model generalizes to capture both adversarial and iid input sequences, as well as their mixtures, which can be modeled by setting certain distributions as point-masses. <br>We demonstrate adversarially replicable online learning algorithms for online linear optimization and the experts problem that achieve sub-linear regret. Additionally, we propose a general framework for converting an online learner into an adversarially replicable one within our setting, bounding the new regret in terms of the original algorithm's regret. We also present a nearly optimal (in terms of regret) iid-replicable online algorithm for the experts problem, highlighting the distinction between the iid and adversarial notions of replicability. Finally, we establish lower bounds on the regret (in terms of the replicability parameter and time) that any replicable online algorithm must incur. </p> </div> </dd> <dt> <a name='item70'>[70]</a> <a href ="/abs/2411.13731" title="Abstract" id="2411.13731"> arXiv:2411.13731 </a> [<a href="/pdf/2411.13731" title="Download PDF" id="pdf-2411.13731" aria-labelledby="pdf-2411.13731">pdf</a>, <a href="https://arxiv.org/html/2411.13731v1" title="View HTML" id="html-2411.13731" aria-labelledby="html-2411.13731" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13731" title="Other formats" id="oth-2411.13731" aria-labelledby="oth-2411.13731">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Delta-Influence: Unlearning Poisons via Influence Functions </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+W">Wenjie Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+J">Jiawei Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=de+Witt,+C+S">Christian Schroeder de Witt</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Prabhu,+A">Ameya Prabhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sanyal,+A">Amartya Sanyal</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted at NeurIPS Workshop on Attributing Model Behavior at Scale (ATTRIB @ NeurIPS 2024) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Cryptography and Security (cs.CR); Machine Learning (cs.LG) </div> <p class='mathjax'> Addressing data integrity challenges, such as unlearning the effects of data poisoning after model training, is necessary for the reliable deployment of machine learning models. State-of-the-art influence functions, such as EK-FAC, often fail to accurately attribute abnormal model behavior to the specific poisoned training data responsible for the data poisoning attack. In addition, traditional unlearning algorithms often struggle to effectively remove the influence of poisoned samples, particularly when only a few affected examples can be identified. To address these challenge, we introduce $\Delta$-Influence, a novel approach that leverages influence functions to trace abnormal model behavior back to the responsible poisoned training data using as little as just one poisoned test example. $\Delta$-Influence applies data transformations that sever the link between poisoned training data and compromised test points without significantly affecting clean data. This allows $\Delta$-Influence to detect large negative shifts in influence scores following data transformations, a phenomenon we term as influence collapse, thereby accurately identifying poisoned training data. Unlearning this subset, e.g. through retraining, effectively eliminates the data poisoning. We validate our method across three vision-based poisoning attacks and three datasets, benchmarking against four detection algorithms and five unlearning strategies. We show that $\Delta$-Influence consistently achieves the best unlearning across all settings, showing the promise of influence functions for corrective unlearning. Our code is publicly available at: \url{<a href="https://github.com/andyisokay/delta-influence" rel="external noopener nofollow" class="link-external link-https">this https URL</a>} </p> </div> </dd> <dt> <a name='item71'>[71]</a> <a href ="/abs/2411.13732" title="Abstract" id="2411.13732"> arXiv:2411.13732 </a> [<a href="/pdf/2411.13732" title="Download PDF" id="pdf-2411.13732" aria-labelledby="pdf-2411.13732">pdf</a>, <a href="https://arxiv.org/html/2411.13732v1" title="View HTML" id="html-2411.13732" aria-labelledby="html-2411.13732" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13732" title="Other formats" id="oth-2411.13732" aria-labelledby="oth-2411.13732">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Typing Composite Subjects </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Aceto,+L">Luca Aceto</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gorla,+D">Daniele Gorla</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lybech,+S">Stian Lybech</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Programming Languages (cs.PL)</span> </div> <p class='mathjax'> Many type systems have been presented in the literature for variants of the pi-calculus, but none of them are able to handle composite subjects such as those found in the language epi, which features polyadic synchronisation. The purpose of this paper is to address the question of how to type composite subjects in a general fashion. We assess the validity of our proposal by first proving the standard correctness results for a type system (i.e., subject reduction and type safety). Then, we follow the path opened by Sangiorgi in 1998 and show an encoding in epi of a minimal OO language called WC (While with \Classes) whose ``expectable'' type system exactly corresponds to the one induced by ours via the encoding. This comparison contributes to understanding the relationship between our types and conventional types for OO languages. </p> </div> </dd> <dt> <a name='item72'>[72]</a> <a href ="/abs/2411.13733" title="Abstract" id="2411.13733"> arXiv:2411.13733 </a> [<a href="/pdf/2411.13733" title="Download PDF" id="pdf-2411.13733" aria-labelledby="pdf-2411.13733">pdf</a>, <a href="https://arxiv.org/html/2411.13733v1" title="View HTML" id="html-2411.13733" aria-labelledby="html-2411.13733" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13733" title="Other formats" id="oth-2411.13733" aria-labelledby="oth-2411.13733">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> On Generalization Bounds for Neural Networks with Low Rank Layers </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Pinto,+A">Andrea Pinto</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rangamani,+A">Akshay Rangamani</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Poggio,+T">Tomaso Poggio</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Published in the MIT DSpace repository: <a href="https://dspace.mit.edu/handle/1721.1/157263" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Machine Learning (stat.ML) </div> <p class='mathjax'> While previous optimization results have suggested that deep neural networks tend to favour low-rank weight matrices, the implications of this inductive bias on generalization bounds remain underexplored. In this paper, we apply Maurer's chain rule for Gaussian complexity to analyze how low-rank layers in deep networks can prevent the accumulation of rank and dimensionality factors that typically multiply across layers. This approach yields generalization bounds for rank and spectral norm constrained networks. We compare our results to prior generalization bounds for deep networks, highlighting how deep networks with low-rank layers can achieve better generalization than those with full-rank layers. Additionally, we discuss how this framework provides new perspectives on the generalization capabilities of deep networks exhibiting neural collapse. </p> </div> </dd> <dt> <a name='item73'>[73]</a> <a href ="/abs/2411.13738" title="Abstract" id="2411.13738"> arXiv:2411.13738 </a> [<a href="/pdf/2411.13738" title="Download PDF" id="pdf-2411.13738" aria-labelledby="pdf-2411.13738">pdf</a>, <a href="https://arxiv.org/html/2411.13738v1" title="View HTML" id="html-2411.13738" aria-labelledby="html-2411.13738" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13738" title="Other formats" id="oth-2411.13738" aria-labelledby="oth-2411.13738">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Assessing Gender Bias in LLMs: Comparing LLM Outputs with Human Perceptions and Official Statistics </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Bas,+T">Tetiana Bas</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> under review for Coling conference </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> This study investigates gender bias in large language models (LLMs) by comparing their gender perception to that of human respondents, U.S. Bureau of Labor Statistics data, and a 50% no-bias benchmark. We created a new evaluation set using occupational data and role-specific sentences. Unlike common benchmarks included in LLM training data, our set is newly developed, preventing data leakage and test set contamination. Five LLMs were tested to predict the gender for each role using single-word answers. We used Kullback-Leibler (KL) divergence to compare model outputs with human perceptions, statistical data, and the 50% neutrality benchmark. All LLMs showed significant deviation from gender neutrality and aligned more with statistical data, still reflecting inherent biases. </p> </div> </dd> <dt> <a name='item74'>[74]</a> <a href ="/abs/2411.13740" title="Abstract" id="2411.13740"> arXiv:2411.13740 </a> [<a href="/pdf/2411.13740" title="Download PDF" id="pdf-2411.13740" aria-labelledby="pdf-2411.13740">pdf</a>, <a href="https://arxiv.org/html/2411.13740v1" title="View HTML" id="html-2411.13740" aria-labelledby="html-2411.13740" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13740" title="Other formats" id="oth-2411.13740" aria-labelledby="oth-2411.13740">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Federated Continual Learning for Edge-AI: A Comprehensive Survey </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Z">Zi Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+F">Fei Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+F">Feng Yu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+Y">Yurui Zhou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hu,+J">Jia Hu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Min,+G">Geyong Min</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Distributed, Parallel, and Cluster Computing (cs.DC); Networking and Internet Architecture (cs.NI) </div> <p class='mathjax'> Edge-AI, the convergence of edge computing and artificial intelligence (AI), has become a promising paradigm that enables the deployment of advanced AI models at the network edge, close to users. In Edge-AI, federated continual learning (FCL) has emerged as an imperative framework, which fuses knowledge from different clients while preserving data privacy and retaining knowledge from previous tasks as it learns new ones. By so doing, FCL aims to ensure stable and reliable performance of learning models in dynamic and distributed environments. In this survey, we thoroughly review the state-of-the-art research and present the first comprehensive survey of FCL for Edge-AI. We categorize FCL methods based on three task characteristics: federated class continual learning, federated domain continual learning, and federated task continual learning. For each category, an in-depth investigation and review of the representative methods are provided, covering background, challenges, problem formalisation, solutions, and limitations. Besides, existing real-world applications empowered by FCL are reviewed, indicating the current progress and potential of FCL in diverse application domains. Furthermore, we discuss and highlight several prospective research directions of FCL such as algorithm-hardware co-design for FCL and FCL with foundation models, which could provide insights into the future development and practical deployment of FCL in the era of Edge-AI. </p> </div> </dd> <dt> <a name='item75'>[75]</a> <a href ="/abs/2411.13749" title="Abstract" id="2411.13749"> arXiv:2411.13749 </a> [<a href="/pdf/2411.13749" title="Download PDF" id="pdf-2411.13749" aria-labelledby="pdf-2411.13749">pdf</a>, <a href="/format/2411.13749" title="Other formats" id="oth-2411.13749" aria-labelledby="oth-2411.13749">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> AI-Driven Agents with Prompts Designed for High Agreeableness Increase the Likelihood of Being Mistaken for a Human in the Turing Test </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Le%C3%B3n-Dom%C3%ADnguez,+U">U. Le贸n-Dom铆nguez</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Flores-Flores,+E+D">E. D. Flores-Flores</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Garc%C3%ADa-Jasso,+A+J">A. J. Garc铆a-Jasso</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=G%C3%B3mez-Cuellar,+M+K">M. K. G贸mez-Cuellar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Torres-S%C3%A1nchez,+D">D. Torres-S谩nchez</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Basora-Marimon,+A">A. Basora-Marimon</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 25 pages, 2 figures, 7 tables </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Artificial Intelligence (cs.AI)</span> </div> <p class='mathjax'> Large Language Models based on transformer algorithms have revolutionized Artificial Intelligence by enabling verbal interaction with machines akin to human conversation. These AI agents have surpassed the Turing Test, achieving confusion rates up to 50%. However, challenges persist, especially with the advent of robots and the need to humanize machines for improved Human-AI collaboration. In this experiment, three GPT agents with varying levels of agreeableness (disagreeable, neutral, agreeable) based on the Big Five Inventory were tested in a Turing Test. All exceeded a 50% confusion rate, with the highly agreeable AI agent surpassing 60%. This agent was also recognized as exhibiting the most human-like traits. Various explanations in the literature address why these GPT agents were perceived as human, including psychological frameworks for understanding anthropomorphism. These findings highlight the importance of personality engineering as an emerging discipline in artificial intelligence, calling for collaboration with psychology to develop ergonomic psychological models that enhance system adaptability in collaborative activities. </p> </div> </dd> <dt> <a name='item76'>[76]</a> <a href ="/abs/2411.13751" title="Abstract" id="2411.13751"> arXiv:2411.13751 </a> [<a href="/pdf/2411.13751" title="Download PDF" id="pdf-2411.13751" aria-labelledby="pdf-2411.13751">pdf</a>, <a href="https://arxiv.org/html/2411.13751v1" title="View HTML" id="html-2411.13751" aria-labelledby="html-2411.13751" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13751" title="Other formats" id="oth-2411.13751" aria-labelledby="oth-2411.13751">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> ScAlN-on-SiC Ku-Band Solidly-Mounted Bidimensional Mode Resonators </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Colombo,+L">Luca Colombo</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Spagnuolo,+L">Luca Spagnuolo</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Saha,+K">Kapil Saha</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Giribaldi,+G">Gabriel Giribaldi</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Simeoni,+P">Pietro Simeoni</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Rinaldi,+M">Matteo Rinaldi</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Submitted to IEEE EDL </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Systems and Control (eess.SY)</span> </div> <p class='mathjax'> This letter reports on Solidly-Mounted Bidimensional Mode Resonators (S2MRs) based on 30% Scandium-doped Aluminum Nitride (ScAlN) on Silicon Carbide (SiC), operating near 16 GHz. Experimental results demonstrate mechanical quality factors (Qm) as high as 380, electromechanical coupling coefficients (kt2) of 4.5%, an overall Figure of Merit (FOM = Qmkt2) exceeding 17, and power handling greater than 20 dBm for devices closely matched to 50 ohm. To the best of the authors' knowledge, S2MRs exhibit the highest Key Performance Indicators (KPIs) among solidly mounted resonators in the Ku band, paving the way for the integration of nanoacoustic devices on fast substrates with high-power electronics, tailored for military and harsh environment applications. </p> </div> </dd> <dt> <a name='item77'>[77]</a> <a href ="/abs/2411.13753" title="Abstract" id="2411.13753"> arXiv:2411.13753 </a> [<a href="/pdf/2411.13753" title="Download PDF" id="pdf-2411.13753" aria-labelledby="pdf-2411.13753">pdf</a>, <a href="https://arxiv.org/html/2411.13753v1" title="View HTML" id="html-2411.13753" aria-labelledby="html-2411.13753" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13753" title="Other formats" id="oth-2411.13753" aria-labelledby="oth-2411.13753">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> FAST-Splat: Fast, Ambiguity-Free Semantics Transfer in Gaussian Splatting </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Shorinwa,+O">Ola Shorinwa</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sun,+J">Jiankai Sun</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Schwager,+M">Mac Schwager</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> We present FAST-Splat for fast, ambiguity-free semantic Gaussian Splatting, which seeks to address the main limitations of existing semantic Gaussian Splatting methods, namely: slow training and rendering speeds; high memory usage; and ambiguous semantic object localization. In deriving FAST-Splat , we formulate open-vocabulary semantic Gaussian Splatting as the problem of extending closed-set semantic distillation to the open-set (open-vocabulary) setting, enabling FAST-Splat to provide precise semantic object localization results, even when prompted with ambiguous user-provided natural-language queries. Further, by exploiting the explicit form of the Gaussian Splatting scene representation to the fullest extent, FAST-Splat retains the remarkable training and rendering speeds of Gaussian Splatting. Specifically, while existing semantic Gaussian Splatting methods distill semantics into a separate neural field or utilize neural models for dimensionality reduction, FAST-Splat directly augments each Gaussian with specific semantic codes, preserving the training, rendering, and memory-usage advantages of Gaussian Splatting over neural field methods. These Gaussian-specific semantic codes, together with a hash-table, enable semantic similarity to be measured with open-vocabulary user prompts and further enable FAST-Splat to respond with unambiguous semantic object labels and 3D masks, unlike prior methods. In experiments, we demonstrate that FAST-Splat is 4x to 6x faster to train with a 13x faster data pre-processing step, achieves between 18x to 75x faster rendering speeds, and requires about 3x smaller GPU memory, compared to the best-competing semantic Gaussian Splatting methods. Further, FAST-Splat achieves relatively similar or better semantic segmentation performance compared to existing methods. After the review period, we will provide links to the project website and the codebase. </p> </div> </dd> <dt> <a name='item78'>[78]</a> <a href ="/abs/2411.13754" title="Abstract" id="2411.13754"> arXiv:2411.13754 </a> [<a href="/pdf/2411.13754" title="Download PDF" id="pdf-2411.13754" aria-labelledby="pdf-2411.13754">pdf</a>, <a href="https://arxiv.org/html/2411.13754v1" title="View HTML" id="html-2411.13754" aria-labelledby="html-2411.13754" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13754" title="Other formats" id="oth-2411.13754" aria-labelledby="oth-2411.13754">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Learning to Reason Iteratively and Parallelly for Complex Visual Reasoning Scenarios </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Jaiswal,+S">Shantanu Jaiswal</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Roy,+D">Debaditya Roy</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fernando,+B">Basura Fernando</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tan,+C">Cheston Tan</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> NeurIPS 2024 camera ready; source code to be released at: <a href="https://github.com/shantanuj/IPRM_Iterative_and_Parallel_Reasoning_Mechanism" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> Complex visual reasoning and question answering (VQA) is a challenging task that requires compositional multi-step processing and higher-level reasoning capabilities beyond the immediate recognition and localization of objects and events. Here, we introduce a fully neural Iterative and Parallel Reasoning Mechanism (IPRM) that combines two distinct forms of computation -- iterative and parallel -- to better address complex VQA scenarios. Specifically, IPRM's "iterative" computation facilitates compositional step-by-step reasoning for scenarios wherein individual operations need to be computed, stored, and recalled dynamically (e.g. when computing the query "determine the color of pen to the left of the child in red t-shirt sitting at the white table"). Meanwhile, its "parallel" computation allows for the simultaneous exploration of different reasoning paths and benefits more robust and efficient execution of operations that are mutually independent (e.g. when counting individual colors for the query: "determine the maximum occurring color amongst all t-shirts"). We design IPRM as a lightweight and fully-differentiable neural module that can be conveniently applied to both transformer and non-transformer vision-language backbones. It notably outperforms prior task-specific methods and transformer-based attention modules across various image and video VQA benchmarks testing distinct complex reasoning capabilities such as compositional spatiotemporal reasoning (AGQA), situational reasoning (STAR), multi-hop reasoning generalization (CLEVR-Humans) and causal event linking (CLEVRER-Humans). Further, IPRM's internal computations can be visualized across reasoning steps, aiding interpretability and diagnosis of its errors. </p> </div> </dd> <dt> <a name='item79'>[79]</a> <a href ="/abs/2411.13755" title="Abstract" id="2411.13755"> arXiv:2411.13755 </a> [<a href="/pdf/2411.13755" title="Download PDF" id="pdf-2411.13755" aria-labelledby="pdf-2411.13755">pdf</a>, <a href="https://arxiv.org/html/2411.13755v1" title="View HTML" id="html-2411.13755" aria-labelledby="html-2411.13755" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13755" title="Other formats" id="oth-2411.13755" aria-labelledby="oth-2411.13755">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> DKMGP: A Gaussian Process Approach to Multi-Task and Multi-Step Vehicle Dynamics Modeling in Autonomous Racing </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ning,+J">Jingyun Ning</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Behl,+M">Madhur Behl</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 13 pages, 6 figures, 4 tables; submitted to 7th Annual Learning for Dynamics & Control Conference </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span> </div> <p class='mathjax'> Autonomous racing is gaining attention for its potential to advance autonomous vehicle technologies. Accurate race car dynamics modeling is essential for capturing and predicting future states like position, orientation, and velocity. However, accurately modeling complex subsystems such as tires and suspension poses significant challenges. In this paper, we introduce the Deep Kernel-based Multi-task Gaussian Process (DKMGP), which leverages the structure of a variational multi-task and multi-step Gaussian process model enhanced with deep kernel learning for vehicle dynamics modeling. Unlike existing single-step methods, DKMGP performs multi-step corrections with an adaptive correction horizon (ACH) algorithm that dynamically adjusts to varying driving conditions. To validate and evaluate the proposed DKMGP method, we compare the model performance with DKL-SKIP and a well-tuned single-track model, using high-speed dynamics data (exceeding 230kmph) collected from a full-scale Indy race car during the Indy Autonomous Challenge held at the Las Vegas Motor Speedway at CES 2024. The results demonstrate that DKMGP achieves upto 99% prediction accuracy compared to one-step DKL-SKIP, while improving real-time computational efficiency by 1752x. Our results show that DKMGP is a scalable and efficient solution for vehicle dynamics modeling making it suitable for high-speed autonomous racing control. </p> </div> </dd> <dt> <a name='item80'>[80]</a> <a href ="/abs/2411.13757" title="Abstract" id="2411.13757"> arXiv:2411.13757 </a> [<a href="/pdf/2411.13757" title="Download PDF" id="pdf-2411.13757" aria-labelledby="pdf-2411.13757">pdf</a>, <a href="https://arxiv.org/html/2411.13757v1" title="View HTML" id="html-2411.13757" aria-labelledby="html-2411.13757" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13757" title="Other formats" id="oth-2411.13757" aria-labelledby="oth-2411.13757">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> AttentionBreaker: Adaptive Evolutionary Optimization for Unmasking Vulnerabilities in LLMs through Bit-Flip Attacks </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Das,+S">Sanjay Das</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bhattacharya,+S">Swastik Bhattacharya</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kundu,+S">Souvik Kundu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kundu,+S">Shamik Kundu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Menon,+A">Anand Menon</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Raha,+A">Arnab Raha</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Basu,+K">Kanad Basu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Cryptography and Security (cs.CR)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> Large Language Models (LLMs) have revolutionized natural language processing (NLP), excelling in tasks like text generation and summarization. However, their increasing adoption in mission-critical applications raises concerns about hardware-based threats, particularly bit-flip attacks (BFAs). BFAs, enabled by fault injection methods such as Rowhammer, target model parameters in memory, compromising both integrity and performance. Identifying critical parameters for BFAs in the vast parameter space of LLMs poses significant challenges. While prior research suggests transformer-based architectures are inherently more robust to BFAs compared to traditional deep neural networks, we challenge this assumption. For the first time, we demonstrate that as few as three bit-flips can cause catastrophic performance degradation in an LLM with billions of parameters. Current BFA techniques are inadequate for exploiting this vulnerability due to the difficulty of efficiently identifying critical parameters within the immense parameter space. To address this, we propose AttentionBreaker, a novel framework tailored for LLMs that enables efficient traversal of the parameter space to identify critical parameters. Additionally, we introduce GenBFA, an evolutionary optimization strategy designed to refine the search further, isolating the most critical bits for an efficient and effective attack. Empirical results reveal the profound vulnerability of LLMs to AttentionBreaker. For example, merely three bit-flips (4.129 x 10^-9% of total parameters) in the LLaMA3-8B-Instruct 8-bit quantized (W8) model result in a complete performance collapse: accuracy on MMLU tasks drops from 67.3% to 0%, and Wikitext perplexity skyrockets from 12.6 to 4.72 x 10^5. These findings underscore the effectiveness of AttentionBreaker in uncovering and exploiting critical vulnerabilities within LLM architectures. </p> </div> </dd> <dt> <a name='item81'>[81]</a> <a href ="/abs/2411.13760" title="Abstract" id="2411.13760"> arXiv:2411.13760 </a> [<a href="/pdf/2411.13760" title="Download PDF" id="pdf-2411.13760" aria-labelledby="pdf-2411.13760">pdf</a>, <a href="https://arxiv.org/html/2411.13760v1" title="View HTML" id="html-2411.13760" aria-labelledby="html-2411.13760" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13760" title="Other formats" id="oth-2411.13760" aria-labelledby="oth-2411.13760">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Framework for Evaluating LLMs Under Task Indeterminacy </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Guerdan,+L">Luke Guerdan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wallach,+H">Hanna Wallach</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Barocas,+S">Solon Barocas</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chouldechova,+A">Alexandra Chouldechova</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> To Appear in NeurIPS 2024 Workshops on Evaluating Evaluations (EvalEval) and Statistical Foundations of LLMs and Foundation Models (SFLLM) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Computation and Language (cs.CL); Human-Computer Interaction (cs.HC) </div> <p class='mathjax'> Large language model (LLM) evaluations often assume there is a single correct response -- a gold label -- for each item in the evaluation corpus. However, some tasks can be ambiguous -- i.e., they provide insufficient information to identify a unique interpretation -- or vague -- i.e., they do not clearly indicate where to draw the line when making a determination. Both ambiguity and vagueness can cause task indeterminacy -- the condition where some items in the evaluation corpus have more than one correct response. In this paper, we develop a framework for evaluating LLMs under task indeterminacy. Our framework disentangles the relationships between task specification, human ratings, and LLM responses in the LLM evaluation pipeline. Using our framework, we conduct a synthetic experiment showing that evaluations that use the "gold label" assumption underestimate the true performance. We also provide a method for estimating an error-adjusted performance interval given partial knowledge about indeterminate items in the evaluation corpus. We conclude by outlining implications of our work for the research community. </p> </div> </dd> <dt> <a name='item82'>[82]</a> <a href ="/abs/2411.13766" title="Abstract" id="2411.13766"> arXiv:2411.13766 </a> [<a href="/pdf/2411.13766" title="Download PDF" id="pdf-2411.13766" aria-labelledby="pdf-2411.13766">pdf</a>, <a href="https://arxiv.org/html/2411.13766v1" title="View HTML" id="html-2411.13766" aria-labelledby="html-2411.13766" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13766" title="Other formats" id="oth-2411.13766" aria-labelledby="oth-2411.13766">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Tiny-Align: Bridging Automatic Speech Recognition and Large Language Model on the Edge </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Qin,+R">Ruiyang Qin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+D">Dancheng Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+G">Gelei Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yan,+Z">Zheyu Yan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+C">Chenhui Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hu,+Y">Yuting Hu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hu,+X+S">X. Sharon Hu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xiong,+J">Jinjun Xiong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shi,+Y">Yiyu Shi</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 7 pages, 8 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Artificial Intelligence (cs.AI); Audio and Speech Processing (eess.AS) </div> <p class='mathjax'> The combination of Large Language Models (LLM) and Automatic Speech Recognition (ASR), when deployed on edge devices (called edge ASR-LLM), can serve as a powerful personalized assistant to enable audio-based interaction for users. Compared to text-based interaction, edge ASR-LLM allows accessible and natural audio interactions. Unfortunately, existing ASR-LLM models are mainly trained in high-performance computing environments and produce substantial model weights, making them difficult to deploy on edge devices. More importantly, to better serve users' personalized needs, the ASR-LLM must be able to learn from each distinct user, given that audio input often contains highly personalized characteristics that necessitate personalized on-device training. Since individually fine-tuning the ASR or LLM often leads to suboptimal results due to modality-specific limitations, end-to-end training ensures seamless integration of audio features and language understanding (cross-modal alignment), ultimately enabling a more personalized and efficient adaptation on edge devices. However, due to the complex training requirements and substantial computational demands of existing approaches, cross-modal alignment between ASR audio and LLM can be challenging on edge devices. In this work, we propose a resource-efficient cross-modal alignment framework that bridges ASR and LLMs on edge devices to handle personalized audio input. Our framework enables efficient ASR-LLM alignment on resource-constrained devices like NVIDIA Jetson Orin (8GB RAM), achieving 50x training time speedup while improving the alignment quality by more than 50\%. To the best of our knowledge, this is the first work to study efficient ASR-LLM alignment on resource-constrained edge devices. </p> </div> </dd> <dt> <a name='item83'>[83]</a> <a href ="/abs/2411.13768" title="Abstract" id="2411.13768"> arXiv:2411.13768 </a> [<a href="/pdf/2411.13768" title="Download PDF" id="pdf-2411.13768" aria-labelledby="pdf-2411.13768">pdf</a>, <a href="https://arxiv.org/html/2411.13768v1" title="View HTML" id="html-2411.13768" aria-labelledby="html-2411.13768" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13768" title="Other formats" id="oth-2411.13768" aria-labelledby="oth-2411.13768">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> An Evaluation-Driven Approach to Designing LLM Agents: Process and Architecture </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Xia,+B">Boming Xia</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lu,+Q">Qinghua Lu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+L">Liming Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xing,+Z">Zhenchang Xing</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+D">Dehai Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+H">Hao Zhang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Software Engineering (cs.SE)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> The advent of Large Language Models (LLMs) has enabled the development of LLM agents capable of autonomously achieving under-specified goals and continuously evolving through post-deployment improvement, sometimes without requiring code or model updates. Conventional approaches, such as pre-defined test cases and code/model redevelopment pipelines, are inadequate for addressing the unique challenges of LLM agent development, particularly in terms of quality and risk control. This paper introduces an evaluation-driven design approach, inspired by test-driven development, to address these challenges. Through a multivocal literature review (MLR), we synthesize existing LLM evaluation methods and propose a novel process model and reference architecture specifically designed for LLM agents. The proposed approach integrates online and offline evaluations to support adaptive runtime adjustments and systematic offline redevelopment, improving runtime pipelines, artifacts, system architecture, and LLMs by continuously incorporating evaluation results, including fine-grained feedback from human and AI evaluators. </p> </div> </dd> <dt> <a name='item84'>[84]</a> <a href ="/abs/2411.13770" title="Abstract" id="2411.13770"> arXiv:2411.13770 </a> [<a href="/pdf/2411.13770" title="Download PDF" id="pdf-2411.13770" aria-labelledby="pdf-2411.13770">pdf</a>, <a href="https://arxiv.org/html/2411.13770v1" title="View HTML" id="html-2411.13770" aria-labelledby="html-2411.13770" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13770" title="Other formats" id="oth-2411.13770" aria-labelledby="oth-2411.13770">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Novel Passive Occupational Shoulder Exoskeleton With Adjustable Peak Assistive Torque Angle For Overhead Tasks </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Tian,+J">Jin Tian</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+H">Haiqi Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lu,+C">Changjia Lu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+C">Chifu Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Y">Yingjie Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wei,+B">Baichun Wei</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yi,+C">Chunzhi Yi</a></div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> IEEE Transactions on Biomedical Engineering,2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span> </div> <p class='mathjax'> Objective: Overhead tasks are a primary inducement to work-related musculoskeletal disorders. Aiming to reduce shoulder physical loads, passive shoulder exoskeletons are increasingly prevalent in the industry due to their lightweight, affordability, and effectiveness. However, they can only handle specific tasks and struggle to balance compactness with a sufficient range of motion effectively. Method: We proposed a novel passive occupational shoulder exoskeleton designed to handle various overhead tasks at different arm elevation angles, ensuring sufficient ROM while maintaining compactness. By formulating kinematic models and simulations, an ergonomic shoulder structure was developed. Then, we presented a torque generator equipped with an adjustable peak assistive torque angle to switch between low and high assistance phases through a passive clutch mechanism. Ten healthy participants were recruited to validate its functionality by performing the screwing task. Results: Measured range of motion results demonstrated that the exoskeleton can ensure a sufficient ROM in both sagittal (164$^\circ$) and horizontal (158$^\circ$) flexion/extension movements. The experimental results of the screwing task showed that the exoskeleton could reduce muscle activation (up to 49.6%), perceived effort and frustration, and provide an improved user experience (scored 79.7 out of 100). Conclusion: These results indicate that the proposed exoskeleton can guarantee natural movements and provide efficient assistance during overhead work, and thus have the potential to reduce the risk of musculoskeletal disorders. Significance: The proposed exoskeleton provides insights into multi-task adaptability and efficient assistance, highlighting the potential for expanding the application of exoskeletons. </p> </div> </dd> <dt> <a name='item85'>[85]</a> <a href ="/abs/2411.13771" title="Abstract" id="2411.13771"> arXiv:2411.13771 </a> [<a href="/pdf/2411.13771" title="Download PDF" id="pdf-2411.13771" aria-labelledby="pdf-2411.13771">pdf</a>, <a href="/format/2411.13771" title="Other formats" id="oth-2411.13771" aria-labelledby="oth-2411.13771">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Deciphering Urban Morphogenesis: A Morphospace Approach </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Netto,+V">Vini Netto</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cacholas,+C">Caio Cacholas</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Daems,+D">Dries Daems</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ribeiro,+F">Fabiano Ribeiro</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Davis,+H">Howard Davis</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lenz,+D">Daniel Lenz</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 20 pages, 8 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computers and Society (cs.CY)</span>; Social and Information Networks (cs.SI); Physics and Society (physics.soc-ph) </div> <p class='mathjax'> Cities emerged independently across different world regions and historical periods, raising fundamental questions: How did the first urban settlements develop? What social and spatial conditions enabled their emergence? Are these processes universal or context-dependent? Moreover, what distinguishes cities from other human settlements? This paper investigates the drivers of city creation through a hybrid approach that integrates urban theory with the biological concept of morphospace (the space of all possible configurations) and archaeological evidence. It examines the transition from sedentary hunter-gatherer communities to urban societies, identifying key forces such as defence, social hierarchy formation, population scale, and work specialization, culminating in increasingly complex divisions of labour as a central driver of urbanization. Morphogenesis is conceptualised as a trajectory across morphospace, shaped by structure-seeking selection processes that balance density, permeability, and information as critical dimensions. The study highlights the non-ergodic nature of urban morphogenesis, where configurations are progressively selected based on their fitness to support the diversifying interactions between mutually dependent agents. The morphospace framework effectively distinguishes between theoretical spatial configurations, non-urban and proto-urban settlements, and contemporary cities. This analysis supports the proposition that cities emerge and evolve as solutions balancing density, permeability, and informational organization, enabling them to support increasingly complex societal functions. </p> </div> </dd> <dt> <a name='item86'>[86]</a> <a href ="/abs/2411.13772" title="Abstract" id="2411.13772"> arXiv:2411.13772 </a> [<a href="/pdf/2411.13772" title="Download PDF" id="pdf-2411.13772" aria-labelledby="pdf-2411.13772">pdf</a>, <a href="https://arxiv.org/html/2411.13772v1" title="View HTML" id="html-2411.13772" aria-labelledby="html-2411.13772" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13772" title="Other formats" id="oth-2411.13772" aria-labelledby="oth-2411.13772">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Characteristic Mapping Method with Source Terms: Applications to Ideal Magnetohydrodynamics </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Yin,+X">Xi-Yuan Yin</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Krah,+P">Philipp Krah</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Nave,+J">Jean-Christophe Nave</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Schneider,+K">Kai Schneider</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> The preprint has not been revised yet! </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Numerical Analysis (math.NA)</span>; Computational Physics (physics.comp-ph); Fluid Dynamics (physics.flu-dyn); Plasma Physics (physics.plasm-ph) </div> <p class='mathjax'> This work introduces a generalized characteristic mapping method designed to handle non-linear advection with source terms. The semi-Lagrangian approach advances the flow map, incorporating the source term via the Duhamel integral. We derive a recursive formula for the time decomposition of the map and the source term integral, enhancing computational efficiency. Benchmark computations are presented for a test case with an exact solution and for two-dimensional ideal incompressible magnetohydrodynamics (MHD). Results demonstrate third-order accuracy in both space and time. The submap decomposition method achieves exceptionally high resolution, as illustrated by zooming into fine-scale current sheets. An error estimate is performed and suggests third order convergence in space and time. </p> </div> </dd> <dt> <a name='item87'>[87]</a> <a href ="/abs/2411.13773" title="Abstract" id="2411.13773"> arXiv:2411.13773 </a> [<a href="/pdf/2411.13773" title="Download PDF" id="pdf-2411.13773" aria-labelledby="pdf-2411.13773">pdf</a>, <a href="https://arxiv.org/html/2411.13773v1" title="View HTML" id="html-2411.13773" aria-labelledby="html-2411.13773" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13773" title="Other formats" id="oth-2411.13773" aria-labelledby="oth-2411.13773">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> FastRAG: Retrieval Augmented Generation for Semi-structured Data </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Abane,+A">Amar Abane</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bekri,+A">Anis Bekri</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Battou,+A">Abdella Battou</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Networking and Internet Architecture (cs.NI)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Efficiently processing and interpreting network data is critical for the operation of increasingly complex networks. Recent advances in Large Language Models (LLM) and Retrieval-Augmented Generation (RAG) techniques have improved data processing in network management. However, existing RAG methods like VectorRAG and GraphRAG struggle with the complexity and implicit nature of semi-structured technical data, leading to inefficiencies in time, cost, and retrieval. This paper introduces FastRAG, a novel RAG approach designed for semi-structured data. FastRAG employs schema learning and script learning to extract and structure data without needing to submit entire data sources to an LLM. It integrates text search with knowledge graph (KG) querying to improve accuracy in retrieving context-rich information. Evaluation results demonstrate that FastRAG provides accurate question answering, while improving up to 90% in time and 85% in cost compared to GraphRAG. </p> </div> </dd> <dt> <a name='item88'>[88]</a> <a href ="/abs/2411.13774" title="Abstract" id="2411.13774"> arXiv:2411.13774 </a> [<a href="/pdf/2411.13774" title="Download PDF" id="pdf-2411.13774" aria-labelledby="pdf-2411.13774">pdf</a>, <a href="https://arxiv.org/html/2411.13774v1" title="View HTML" id="html-2411.13774" aria-labelledby="html-2411.13774" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13774" title="Other formats" id="oth-2411.13774" aria-labelledby="oth-2411.13774">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Segment Any Class (SAC): Multi-Class Few-Shot Semantic Segmentation via Class Region Proposals </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zakir,+H+M">Hussni Mohd Zakir</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ho,+E+T+W">Eric Tatt Wei Ho</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 8 pages, 2 figures, 3 tables </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> The Segment-Anything Model (SAM) is a vision foundation model for segmentation with a prompt-driven framework. SAM generates class-agnostic masks based on user-specified instance-referring prompts. However, adapting SAM for automated segmentation -- where manual input is absent -- of specific object classes often requires additional model training. We present Segment Any Class (SAC), a novel, training-free approach that task-adapts SAM for Multi-class segmentation. SAC generates Class-Region Proposals (CRP) on query images which allows us to automatically generate class-aware prompts on probable locations of class instances. CRPs are derived from elementary intra-class and inter-class feature distinctions without any additional training. Our method is versatile, accommodating any N-way K-shot configurations for the multi-class few-shot semantic segmentation (FSS) task. Unlike gradient-learning adaptation of generalist models which risk the loss of generalization and potentially suffer from catastrophic forgetting, SAC solely utilizes automated prompting and achieves superior results over state-of-the-art methods on the COCO-20i benchmark, particularly excelling in high N-way class scenarios. SAC is an interesting demonstration of a prompt-only approach to adapting foundation models for novel tasks with small, limited datasets without any modifications to the foundation model itself. This method offers interesting benefits such as intrinsic immunity to concept or feature loss and rapid, online task adaptation of foundation models. </p> </div> </dd> <dt> <a name='item89'>[89]</a> <a href ="/abs/2411.13775" title="Abstract" id="2411.13775"> arXiv:2411.13775 </a> [<a href="/pdf/2411.13775" title="Download PDF" id="pdf-2411.13775" aria-labelledby="pdf-2411.13775">pdf</a>, <a href="https://arxiv.org/html/2411.13775v1" title="View HTML" id="html-2411.13775" aria-labelledby="html-2411.13775" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13775" title="Other formats" id="oth-2411.13775" aria-labelledby="oth-2411.13775">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Benchmarking GPT-4 against Human Translators: A Comprehensive Evaluation Across Languages, Domains, and Expertise Levels </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Yan,+J">Jianhao Yan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yan,+P">Pingchuan Yan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+Y">Yulong Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+J">Jing Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+X">Xianchao Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Y">Yue Zhang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Work in progress </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> This study presents a comprehensive evaluation of GPT-4's translation capabilities compared to human translators of varying expertise levels. Through systematic human evaluation using the MQM schema, we assess translations across three language pairs (Chinese$\longleftrightarrow$English, Russian$\longleftrightarrow$English, and Chinese$\longleftrightarrow$Hindi) and three domains (News, Technology, and Biomedical). Our findings reveal that GPT-4 achieves performance comparable to junior-level translators in terms of total errors, while still lagging behind senior translators. Unlike traditional Neural Machine Translation systems, which show significant performance degradation in resource-poor language directions, GPT-4 maintains consistent translation quality across all evaluated language pairs. Through qualitative analysis, we identify distinctive patterns in translation approaches: GPT-4 tends toward overly literal translations and exhibits lexical inconsistency, while human translators sometimes over-interpret context and introduce hallucinations. This study represents the first systematic comparison between LLM and human translators across different proficiency levels, providing valuable insights into the current capabilities and limitations of LLM-based translation systems. </p> </div> </dd> <dt> <a name='item90'>[90]</a> <a href ="/abs/2411.13777" title="Abstract" id="2411.13777"> arXiv:2411.13777 </a> [<a href="/pdf/2411.13777" title="Download PDF" id="pdf-2411.13777" aria-labelledby="pdf-2411.13777">pdf</a>, <a href="https://arxiv.org/html/2411.13777v1" title="View HTML" id="html-2411.13777" aria-labelledby="html-2411.13777" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13777" title="Other formats" id="oth-2411.13777" aria-labelledby="oth-2411.13777">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Evidence is All We Need: Do Self-Admitted Technical Debts Impact Method-Level Maintenance? </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Chowdhury,+S">Shaiful Chowdhury</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kidwai,+H">Hisham Kidwai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Asaduzzaman,+M">Muhammad Asaduzzaman</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Software Engineering (cs.SE)</span> </div> <p class='mathjax'> Self-Admitted Technical Debt (SATD) refers to the phenomenon where developers explicitly acknowledge technical debt through comments in the source code. While considerable research has focused on detecting and addressing SATD, its true impact on software maintenance remains underexplored. The few studies that have examined this critical aspect have not provided concrete evidence linking SATD to negative effects on software maintenance. These studies, however, focused only on file- or class-level code granularity. This paper aims to empirically investigate the influence of SATD on various facets of software maintenance at the method level. We assess SATD's effects on code quality, bug susceptibility, change frequency, and the time practitioners typically take to resolve SATD. <br>By analyzing a dataset of 774,051 methods from 49 open-source projects, we discovered that methods containing SATD are not only larger and more complex but also exhibit lower readability and a higher tendency for bugs and changes. We also found that SATD often remains unresolved for extended periods, adversely affecting code quality and maintainability. Our results provide empirical evidence highlighting the necessity of early identification, resource allocation, and proactive management of SATD to mitigate its long-term impacts on software quality and maintenance costs. </p> </div> </dd> <dt> <a name='item91'>[91]</a> <a href ="/abs/2411.13778" title="Abstract" id="2411.13778"> arXiv:2411.13778 </a> [<a href="/pdf/2411.13778" title="Download PDF" id="pdf-2411.13778" aria-labelledby="pdf-2411.13778">pdf</a>, <a href="https://arxiv.org/html/2411.13778v1" title="View HTML" id="html-2411.13778" aria-labelledby="html-2411.13778" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13778" title="Other formats" id="oth-2411.13778" aria-labelledby="oth-2411.13778">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Survey on Adversarial Robustness of LiDAR-based Machine Learning Perception in Autonomous Vehicles </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+J">Junae Kim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kaur,+A">Amardeep Kaur</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 20 pages, 2 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Cryptography and Security (cs.CR) </div> <p class='mathjax'> In autonomous driving, the combination of AI and vehicular technology offers great potential. However, this amalgamation comes with vulnerabilities to adversarial attacks. This survey focuses on the intersection of Adversarial Machine Learning (AML) and autonomous systems, with a specific focus on LiDAR-based systems. We comprehensively explore the threat landscape, encompassing cyber-attacks on sensors and adversarial perturbations. Additionally, we investigate defensive strategies employed in countering these threats. This paper endeavors to present a concise overview of the challenges and advances in securing autonomous driving systems against adversarial threats, emphasizing the need for robust defenses to ensure safety and security. </p> </div> </dd> <dt> <a name='item92'>[92]</a> <a href ="/abs/2411.13779" title="Abstract" id="2411.13779"> arXiv:2411.13779 </a> [<a href="/pdf/2411.13779" title="Download PDF" id="pdf-2411.13779" aria-labelledby="pdf-2411.13779">pdf</a>, <a href="https://arxiv.org/html/2411.13779v1" title="View HTML" id="html-2411.13779" aria-labelledby="html-2411.13779" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13779" title="Other formats" id="oth-2411.13779" aria-labelledby="oth-2411.13779">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> NewsInterview: a Dataset and a Playground to Evaluate LLMs' Ground Gap via Informational Interviews </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Lu,+M">Michael Lu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cho,+H+J">Hyundong Justin Cho</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shi,+W">Weiyan Shi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=May,+J">Jonathan May</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Spangher,+A">Alexander Spangher</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> Large Language Models (LLMs) have demonstrated impressive capabilities in generating coherent text but often struggle with grounding language and strategic dialogue. To address this gap, we focus on journalistic interviews, a domain rich in grounding communication and abundant in data. We curate a dataset of 40,000 two-person informational interviews from NPR and CNN, and reveal that LLMs are significantly less likely than human interviewers to use acknowledgements and to pivot to higher-level questions. Realizing that a fundamental deficit exists in multi-turn planning and strategic thinking, we develop a realistic simulated environment, incorporating source personas and persuasive elements, in order to facilitate the development of agents with longer-horizon rewards. Our experiments show that while source LLMs mimic human behavior in information sharing, interviewer LLMs struggle with recognizing when questions are answered and engaging persuasively, leading to suboptimal information extraction across model size and capability. These findings underscore the need for enhancing LLMs' strategic dialogue capabilities. </p> </div> </dd> <dt> <a name='item93'>[93]</a> <a href ="/abs/2411.13784" title="Abstract" id="2411.13784"> arXiv:2411.13784 </a> [<a href="/pdf/2411.13784" title="Download PDF" id="pdf-2411.13784" aria-labelledby="pdf-2411.13784">pdf</a>, <a href="https://arxiv.org/html/2411.13784v1" title="View HTML" id="html-2411.13784" aria-labelledby="html-2411.13784" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13784" title="Other formats" id="oth-2411.13784" aria-labelledby="oth-2411.13784">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> $d_X$-Privacy for Text and the Curse of Dimensionality </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Asghar,+H+J">Hassan Jameel Asghar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Carpentier,+R">Robin Carpentier</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+B+Z+H">Benjamin Zi Hao Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kaafar,+D">Dali Kaafar</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Cryptography and Security (cs.CR)</span> </div> <p class='mathjax'> A widely used method to ensure privacy of unstructured text data is the multidimensional Laplace mechanism for $d_X$-privacy, which is a relaxation of differential privacy for metric spaces. We identify an intriguing peculiarity of this mechanism. When applied on a word-by-word basis, the mechanism either outputs the original word, or completely dissimilar words, and very rarely any semantically similar words. We investigate this observation in detail, and tie it to the fact that the distance of the nearest neighbor of a word in any word embedding model (which are high-dimensional) is much larger than the relative difference in distances to any of its two consecutive neighbors. We also show that the dot product of the multidimensional Laplace noise vector with any word embedding plays a crucial role in designating the nearest neighbor. We derive the distribution, moments and tail bounds of this dot product. We further propose a fix as a post-processing step, which satisfactorily removes the above-mentioned issue. </p> </div> </dd> <dt> <a name='item94'>[94]</a> <a href ="/abs/2411.13785" title="Abstract" id="2411.13785"> arXiv:2411.13785 </a> [<a href="/pdf/2411.13785" title="Download PDF" id="pdf-2411.13785" aria-labelledby="pdf-2411.13785">pdf</a>, <a href="https://arxiv.org/html/2411.13785v1" title="View HTML" id="html-2411.13785" aria-labelledby="html-2411.13785" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13785" title="Other formats" id="oth-2411.13785" aria-labelledby="oth-2411.13785">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Throughput Maximization for Movable Antenna Systems with Movement Delay Consideration </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+H">Honghao Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+Q">Qingqing Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gao,+Y">Ying Gao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+W">Wen Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mei,+W">Weidong Mei</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hu,+G">Guojie Hu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+L">Lexi Xu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Information Theory (cs.IT)</span>; Signal Processing (eess.SP) </div> <p class='mathjax'> In this paper, we model the minimum achievable throughput within a transmission block of restricted duration and aim to maximize it in movable antenna (MA)-enabled multiuser downlink communications. Particularly, we account for the antenna moving delay caused by mechanical movement, which has not been fully considered in previous studies, and reveal the trade-off between the delay and signal-to-interference-plus-noise ratio at users. To this end, we first consider a single-user setup to analyze the necessity of antenna movement. By quantizing the virtual angles of arrival, we derive the requisite region size for antenna moving, design the initial MA position, and elucidate the relationship between quantization resolution and moving region size. Furthermore, an efficient algorithm is developed to optimize MA position via successive convex approximation, which is subsequently extended to the general multiuser setup. Numerical results demonstrate that the proposed algorithms outperform fixed-position antenna schemes and existing ones without consideration of movement delay. Additionally, our algorithms exhibit excellent adaptability and stability across various transmission block durations and moving region sizes, and are robust to different antenna moving speeds. This allows the hardware cost of MA-aided systems to be reduced by employing low rotational speed motors. </p> </div> </dd> <dt> <a name='item95'>[95]</a> <a href ="/abs/2411.13786" title="Abstract" id="2411.13786"> arXiv:2411.13786 </a> [<a href="/pdf/2411.13786" title="Download PDF" id="pdf-2411.13786" aria-labelledby="pdf-2411.13786">pdf</a>, <a href="https://arxiv.org/html/2411.13786v1" title="View HTML" id="html-2411.13786" aria-labelledby="html-2411.13786" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13786" title="Other formats" id="oth-2411.13786" aria-labelledby="oth-2411.13786">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Adaptable Embeddings Network (AEN) </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Loosmore,+S">Stan Loosmore</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Titus,+A">Alexander Titus</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 20 pages </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Computation and Language (cs.CL) </div> <p class='mathjax'> Modern day Language Models see extensive use in text classification, yet this comes at significant computational cost. Compute-effective classification models are needed for low-resource environments, most notably on edge devices. We introduce Adaptable Embeddings Networks (AEN), a novel dual-encoder architecture using Kernel Density Estimation (KDE). This architecture allows for runtime adaptation of classification criteria without retraining and is non-autoregressive. Through thorough synthetic data experimentation, we demonstrate our model outputs comparable and in certain cases superior results to that of autoregressive models an order of magnitude larger than AEN's size. The architecture's ability to preprocess and cache condition embeddings makes it ideal for edge computing applications and real-time monitoring systems. </p> </div> </dd> <dt> <a name='item96'>[96]</a> <a href ="/abs/2411.13787" title="Abstract" id="2411.13787"> arXiv:2411.13787 </a> [<a href="/pdf/2411.13787" title="Download PDF" id="pdf-2411.13787" aria-labelledby="pdf-2411.13787">pdf</a>, <a href="https://arxiv.org/html/2411.13787v1" title="View HTML" id="html-2411.13787" aria-labelledby="html-2411.13787" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13787" title="Other formats" id="oth-2411.13787" aria-labelledby="oth-2411.13787">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Edge-Cloud Routing for Text-to-Image Model with Token-Level Multi-Metric Prediction </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Xin,+Z">Zewei Xin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+Q">Qinya Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Niu,+C">Chaoyue Niu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+F">Fan Wu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Large text-to-image models demonstrate impressive generation capabilities; however, their substantial size necessitates expensive cloud servers for deployment. Conversely, light-weight models can be deployed on edge devices at lower cost but often with inferior generation quality for complex user prompts. To strike a balance between performance and cost, we propose a routing framework, called \texttt{RouteT2I}, which dynamically selects either the large cloud model or the light-weight edge model for each user prompt. Since generated image quality is challenging to measure directly, \texttt{RouteT2I} establishes multi-dimensional quality metrics, particularly, by evaluating the similarity between the generated images and both positive and negative texts that describe each specific quality metric. \texttt{RouteT2I} then predicts the expected quality of the generated images by identifying key tokens in the prompt and comparing their impact on the quality. \texttt{RouteT2I} further introduces the Pareto relative superiority to compare the multi-metric quality of the generated images. Based on this comparison and predefined cost constraints, \texttt{RouteT2I} allocates prompts to either the edge or the cloud. Evaluation reveals that \texttt{RouteT2I} significantly reduces the number of requesting large cloud model while maintaining high-quality image generation. </p> </div> </dd> <dt> <a name='item97'>[97]</a> <a href ="/abs/2411.13789" title="Abstract" id="2411.13789"> arXiv:2411.13789 </a> [<a href="/pdf/2411.13789" title="Download PDF" id="pdf-2411.13789" aria-labelledby="pdf-2411.13789">pdf</a>, <a href="https://arxiv.org/html/2411.13789v1" title="View HTML" id="html-2411.13789" aria-labelledby="html-2411.13789" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13789" title="Other formats" id="oth-2411.13789" aria-labelledby="oth-2411.13789">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> LEADRE: Multi-Faceted Knowledge Enhanced LLM Empowered Display Advertisement Recommender System </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+F">Fengxin Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+Y">Yi Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Y">Yue Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+C">Chao Zhou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Y">Yuan Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Deng,+X">Xiaoxiang Deng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xue,+W">Wei Xue</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+D">Dapeng Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xiao,+L">Lei Xiao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gu,+H">Haijie Gu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jiang,+J">Jie Jiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+H">Hongyan Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Qin,+B">Biao Qin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=He,+J">Jun He</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Information Retrieval (cs.IR)</span> </div> <p class='mathjax'> Display advertising provides significant value to advertisers, publishers, and users. Traditional display advertising systems utilize a multi-stage architecture consisting of retrieval, coarse ranking, and final ranking. However, conventional retrieval methods rely on ID-based learning to rank mechanisms and fail to adequately utilize the content information of ads, which hampers their ability to provide diverse recommendation lists. <br>To address this limitation, we propose leveraging the extensive world knowledge of LLMs. However, three key challenges arise when attempting to maximize the effectiveness of LLMs: "How to capture user interests", "How to bridge the knowledge gap between LLMs and advertising system", and "How to efficiently deploy LLMs". To overcome these challenges, we introduce a novel LLM-based framework called LLM Empowered Display ADvertisement REcommender system (LEADRE). LEADRE consists of three core modules: (1) The Intent-Aware Prompt Engineering introduces multi-faceted knowledge and designs intent-aware <Prompt, Response> pairs that fine-tune LLMs to generate ads tailored to users' personal interests. (2) The Advertising-Specific Knowledge Alignment incorporates auxiliary fine-tuning tasks and Direct Preference Optimization (DPO) to align LLMs with ad semantic and business value. (3) The Efficient System Deployment deploys LEADRE in an online environment by integrating both latency-tolerant and latency-sensitive service. Extensive offline experiments demonstrate the effectiveness of LEADRE and validate the contributions of individual modules. Online A/B test shows that LEADRE leads to a 1.57% and 1.17% GMV lift for serviced users on WeChat Channels and Moments separately. LEADRE has been deployed on both platforms, serving tens of billions of requests each day. </p> </div> </dd> <dt> <a name='item98'>[98]</a> <a href ="/abs/2411.13794" title="Abstract" id="2411.13794"> arXiv:2411.13794 </a> [<a href="/pdf/2411.13794" title="Download PDF" id="pdf-2411.13794" aria-labelledby="pdf-2411.13794">pdf</a>, <a href="https://arxiv.org/html/2411.13794v1" title="View HTML" id="html-2411.13794" aria-labelledby="html-2411.13794" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13794" title="Other formats" id="oth-2411.13794" aria-labelledby="oth-2411.13794">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> GalaxyEdit: Large-Scale Image Editing Dataset with Enhanced Diffusion Adapter </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Bala,+A">Aniruddha Bala</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jaiswal,+R">Rohan Jaiswal</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rashid,+L">Loay Rashid</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Roheda,+S">Siddharth Roheda</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 10 pages, 6 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Training of large-scale text-to-image and image-to-image models requires a huge amount of annotated data. While text-to-image datasets are abundant, data available for instruction-based image-to-image tasks like object addition and removal is limited. This is because of the several challenges associated with the data generation process, such as, significant human effort, limited automation, suboptimal end-to-end models, data diversity constraints and high expenses. We propose an automated data generation pipeline aimed at alleviating such limitations, and introduce GalaxyEdit - a large-scale image editing dataset for add and remove operations. We fine-tune the SD v1.5 model on our dataset and find that our model can successfully handle a broader range of objects and complex editing instructions, outperforming state-of-the-art methods in FID scores by 11.2\% and 26.1\% for add and remove tasks respectively. Furthermore, in light of on-device usage scenarios, we expand our research to include task-specific lightweight adapters leveraging the ControlNet-xs architecture. While ControlNet-xs excels in canny and depth guided generation, we propose to improve the communication between the control network and U-Net for more intricate add and remove tasks. We achieve this by enhancing ControlNet-xs with non-linear interaction layers based on Volterra filters. Our approach outperforms ControlNet-xs in both add/remove and canny-guided image generation tasks, highlighting the effectiveness of the proposed enhancement. </p> </div> </dd> <dt> <a name='item99'>[99]</a> <a href ="/abs/2411.13797" title="Abstract" id="2411.13797"> arXiv:2411.13797 </a> [<a href="/pdf/2411.13797" title="Download PDF" id="pdf-2411.13797" aria-labelledby="pdf-2411.13797">pdf</a>, <a href="https://arxiv.org/html/2411.13797v1" title="View HTML" id="html-2411.13797" aria-labelledby="html-2411.13797" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13797" title="Other formats" id="oth-2411.13797" aria-labelledby="oth-2411.13797">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Hugging Rain Man: A Novel Facial Action Units Dataset for Analyzing Atypical Facial Expressions in Children with Autism Spectrum Disorder </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ji,+Y">Yanfeng Ji</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+S">Shutong Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+R">Ruyi Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+J">Jingying Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jiang,+X">Xinzhou Jiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Deng,+Z">Zhengyu Deng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Quan,+Y">Yuxuan Quan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+J">Junpeng Liu</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Portions of the dataset, features, and pretrained models are accessible at: <a href="https://github.com/Jonas-DL/Hugging-Rain-Man" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Children with Autism Spectrum Disorder (ASD) often exhibit atypical facial expressions. However, the specific objective facial features that underlie this subjective perception remain unclear. In this paper, we introduce a novel dataset, Hugging Rain Man (HRM), which includes facial action units (AUs) manually annotated by FACS experts for both children with ASD and typical development (TD). The dataset comprises a rich collection of posed and spontaneous facial expressions, totaling approximately 130,000 frames, along with 22 AUs, 10 Action Descriptors (ADs), and atypicality ratings. A statistical analysis of static images from the HRM reveals significant differences between the ASD and TD groups across multiple AUs and ADs when displaying the same emotional expressions, confirming that participants with ASD tend to demonstrate more irregular and diverse expression patterns. Subsequently, a temporal regression method was presented to analyze atypicality of dynamic sequences, thereby bridging the gap between subjective perception and objective facial characteristics. Furthermore, baseline results for AU detection are provided for future research reference. This work not only contributes to our understanding of the unique facial expression characteristics associated with ASD but also provides potential tools for ASD early screening. Portions of the dataset, features, and pretrained models are accessible at: \url{<a href="https://github.com/Jonas-DL/Hugging-Rain-Man" rel="external noopener nofollow" class="link-external link-https">this https URL</a>}. </p> </div> </dd> <dt> <a name='item100'>[100]</a> <a href ="/abs/2411.13799" title="Abstract" id="2411.13799"> arXiv:2411.13799 </a> [<a href="/pdf/2411.13799" title="Download PDF" id="pdf-2411.13799" aria-labelledby="pdf-2411.13799">pdf</a>, <a href="https://arxiv.org/html/2411.13799v1" title="View HTML" id="html-2411.13799" aria-labelledby="html-2411.13799" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13799" title="Other formats" id="oth-2411.13799" aria-labelledby="oth-2411.13799">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Unconsidered Installations: Discovering IoT Deployments in the IPv6 Internet </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Dahlmanns,+M">Markus Dahlmanns</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Heidenreich,+F">Felix Heidenreich</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lohm%C3%B6ller,+J">Johannes Lohm枚ller</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pennekamp,+J">Jan Pennekamp</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wehrle,+K">Klaus Wehrle</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Henze,+M">Martin Henze</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 8 pages, 2 figures </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> In Proceedings of the 2024 IEEE/IFIP Network Operations and Management Symposium (NOMS '24), May 6-10, 2024, Seoul, Korea </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Networking and Internet Architecture (cs.NI)</span> </div> <p class='mathjax'> Internet-wide studies provide extremely valuable insight into how operators manage their Internet of Things (IoT) deployments in reality and often reveal grievances, e.g., significant security issues. However, while IoT devices often use IPv6, past studies resorted to comprehensively scan the IPv4 address space. To fully understand how the IoT and all its services and devices is operated, including IPv6-reachable deployments is inevitable-although scanning the entire IPv6 address space is infeasible. In this paper, we close this gap and examine how to best discover IPv6-reachable IoT deployments. To this end, we propose a methodology that allows combining various IPv6 scan direction approaches to understand the findability and prevalence of IPv6-reachable IoT deployments. Using three sources of active IPv6 addresses and eleven address generators, we discovered 6658 IoT deployments. We derive that the available address sources are a good starting point for finding IoT deployments. Additionally, we show that using two address generators is sufficient to cover most found deployments and save time as well as resources. Assessing the security of the deployments, we surprisingly find similar issues as in the IPv4 Internet, although IPv6 deployments might be newer and generally more up-to-date: Only 39% of deployments have access control in place and only 6.2% make use of TLS inviting attackers, e.g., to eavesdrop sensitive data. </p> </div> </dd> <dt> <a name='item101'>[101]</a> <a href ="/abs/2411.13800" title="Abstract" id="2411.13800"> arXiv:2411.13800 </a> [<a href="/pdf/2411.13800" title="Download PDF" id="pdf-2411.13800" aria-labelledby="pdf-2411.13800">pdf</a>, <a href="https://arxiv.org/html/2411.13800v1" title="View HTML" id="html-2411.13800" aria-labelledby="html-2411.13800" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13800" title="Other formats" id="oth-2411.13800" aria-labelledby="oth-2411.13800">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Explaining GPT-4's Schema of Depression Using Machine Behavior Analysis </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ganesan,+A+V">Adithya V Ganesan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Varadarajan,+V">Vasudha Varadarajan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lal,+Y+K">Yash Kumar Lal</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Eijsbroek,+V+C">Veerle C. Eijsbroek</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kjell,+K">Katarina Kjell</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kjell,+O+N">Oscar N.E. Kjell</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dhanasekaran,+T">Tanuja Dhanasekaran</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Stade,+E+C">Elizabeth C. Stade</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Eichstaedt,+J+C">Johannes C. Eichstaedt</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Boyd,+R+L">Ryan L. Boyd</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Schwartz,+H+A">H. Andrew Schwartz</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Flek,+L">Lucie Flek</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 21 pages, 3 tables, 6 figures, 1 supplementary table, 83 references </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span> </div> <p class='mathjax'> Use of large language models such as ChatGPT (GPT-4) for mental health support has grown rapidly, emerging as a promising route to assess and help people with mood disorders, like depression. However, we have a limited understanding of GPT-4's schema of mental disorders, that is, how it internally associates and interprets symptoms. In this work, we leveraged contemporary measurement theory to decode how GPT-4 interrelates depressive symptoms to inform both clinical utility and theoretical understanding. We found GPT-4's assessment of depression: (a) had high overall convergent validity (r = .71 with self-report on 955 samples, and r = .81 with experts judgments on 209 samples); (b) had moderately high internal consistency (symptom inter-correlates r = .23 to .78 ) that largely aligned with literature and self-report; except that GPT-4 (c) underemphasized suicidality's -- and overemphasized psychomotor's -- relationship with other symptoms, and (d) had symptom inference patterns that suggest nuanced hypotheses (e.g. sleep and fatigue are influenced by most other symptoms while feelings of worthlessness/guilt is mostly influenced by depressed mood). </p> </div> </dd> <dt> <a name='item102'>[102]</a> <a href ="/abs/2411.13802" title="Abstract" id="2411.13802"> arXiv:2411.13802 </a> [<a href="/pdf/2411.13802" title="Download PDF" id="pdf-2411.13802" aria-labelledby="pdf-2411.13802">pdf</a>, <a href="https://arxiv.org/html/2411.13802v1" title="View HTML" id="html-2411.13802" aria-labelledby="html-2411.13802" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13802" title="Other formats" id="oth-2411.13802" aria-labelledby="oth-2411.13802">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> SemiKong: Curating, Training, and Evaluating A Semiconductor Industry-Specific Large Language Model </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Nguyen,+C">Christopher Nguyen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nguyen,+W">William Nguyen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Suzuki,+A">Atsushi Suzuki</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Oku,+D">Daisuke Oku</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Phan,+H+A">Hong An Phan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dinh,+S">Sang Dinh</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nguyen,+Z">Zooey Nguyen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ha,+A">Anh Ha</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Raghavan,+S">Shruti Raghavan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Vo,+H">Huy Vo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nguyen,+T">Thang Nguyen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nguyen,+L">Lan Nguyen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hirayama,+Y">Yoshikuni Hirayama</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> On-going work </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span> </div> <p class='mathjax'> Large Language Models (LLMs) have demonstrated the potential to address some issues within the semiconductor industry. However, they are often general-purpose models that lack the specialized knowledge needed to tackle the unique challenges of this sector, such as the intricate physics and chemistry of semiconductor devices and processes. SemiKong, the first industry-specific LLM for the semiconductor domain, provides a foundation that can be used to develop tailored proprietary models. With SemiKong 1.0, we aim to develop a foundational model capable of understanding etching problems at an expert level. Our key contributions include (a) curating a comprehensive corpus of semiconductor-related texts, (b) creating a foundational model with in-depth semiconductor knowledge, and (c) introducing a framework for integrating expert knowledge, thereby advancing the evaluation process of domain-specific AI models. Through fine-tuning a pre-trained LLM using our curated dataset, we have shown that SemiKong outperforms larger, general-purpose LLMs in various semiconductor manufacturing and design tasks. Our extensive experiments underscore the importance of developing domain-specific LLMs as a foundation for company- or tool-specific proprietary models, paving the way for further research and applications in the semiconductor domain. Code and dataset will be available at <a href="https://github.com/aitomatic/semikong" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </p> </div> </dd> <dt> <a name='item103'>[103]</a> <a href ="/abs/2411.13806" title="Abstract" id="2411.13806"> arXiv:2411.13806 </a> [<a href="/pdf/2411.13806" title="Download PDF" id="pdf-2411.13806" aria-labelledby="pdf-2411.13806">pdf</a>, <a href="https://arxiv.org/html/2411.13806v1" title="View HTML" id="html-2411.13806" aria-labelledby="html-2411.13806" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13806" title="Other formats" id="oth-2411.13806" aria-labelledby="oth-2411.13806">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Weak synchronization in heterogeneous multi-agent systems </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Stoorvogel,+A+A">Anton A. Stoorvogel</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Saberi,+A">Ali Saberi</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Liu,+Z">Zhenwei Liu</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> This paper has been submitted to IJRNC at Nov. 5, 2024 for first round review. arXiv admin note: text overlap with <a href="https://arxiv.org/abs/2403.18200" data-arxiv-id="2403.18200" class="link-https">arXiv:2403.18200</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Systems and Control (eess.SY)</span> </div> <p class='mathjax'> In this paper, we propose a new framework for synchronization of heterogeneous multi agent system which we refer to as weak synchronization. This new framework of synchronization is based on achieving the network stability in the absence of any information on communication network including the connectivity. Here by network stability, we mean that in the basic setup of a multi-agent system, we require that the signals exchanged over the network converge to zero. As such if the network happens to have a directed spanning tree then we obtain classical synchronization. Moreover, we design protocols which achieve weak synchronization for any network without making any kind of assumptions on communication network. If the network happens to have a directed spanning tree, then we obtain classical synchronization. However, if this is not the case then we describe in detail in this paper what kind of synchronization properties are preserved in the system and the output of the different agents can behave. </p> </div> </dd> <dt> <a name='item104'>[104]</a> <a href ="/abs/2411.13807" title="Abstract" id="2411.13807"> arXiv:2411.13807 </a> [<a href="/pdf/2411.13807" title="Download PDF" id="pdf-2411.13807" aria-labelledby="pdf-2411.13807">pdf</a>, <a href="https://arxiv.org/html/2411.13807v1" title="View HTML" id="html-2411.13807" aria-labelledby="html-2411.13807" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13807" title="Other formats" id="oth-2411.13807" aria-labelledby="oth-2411.13807">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> MagicDriveDiT: High-Resolution Long Video Generation for Autonomous Driving with Adaptive Control </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Gao,+R">Ruiyuan Gao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+K">Kai Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xiao,+B">Bo Xiao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hong,+L">Lanqing Hong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+Z">Zhenguo Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+Q">Qiang Xu</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Project Website: <a href="https://flymin.github.io/magicdrivedit/" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> The rapid advancement of diffusion models has greatly improved video synthesis, especially in controllable video generation, which is essential for applications like autonomous driving. However, existing methods are limited by scalability and how control conditions are integrated, failing to meet the needs for high-resolution and long videos for autonomous driving applications. In this paper, we introduce MagicDriveDiT, a novel approach based on the DiT architecture, and tackle these challenges. Our method enhances scalability through flow matching and employs a progressive training strategy to manage complex scenarios. By incorporating spatial-temporal conditional encoding, MagicDriveDiT achieves precise control over spatial-temporal latents. Comprehensive experiments show its superior performance in generating realistic street scene videos with higher resolution and more frames. MagicDriveDiT significantly improves video generation quality and spatial-temporal controls, expanding its potential applications across various tasks in autonomous driving. </p> </div> </dd> <dt> <a name='item105'>[105]</a> <a href ="/abs/2411.13808" title="Abstract" id="2411.13808"> arXiv:2411.13808 </a> [<a href="/pdf/2411.13808" title="Download PDF" id="pdf-2411.13808" aria-labelledby="pdf-2411.13808">pdf</a>, <a href="https://arxiv.org/html/2411.13808v1" title="View HTML" id="html-2411.13808" aria-labelledby="html-2411.13808" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13808" title="Other formats" id="oth-2411.13808" aria-labelledby="oth-2411.13808">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> GPAI Evaluations Standards Taskforce: Towards Effective AI Governance </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Paskov,+P">Patricia Paskov</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Berglund,+L">Lukas Berglund</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Smith,+E">Everett Smith</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Soder,+L">Lisa Soder</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computers and Society (cs.CY)</span> </div> <p class='mathjax'> General-purpose AI evaluations have been proposed as a promising way of identifying and mitigating systemic risks posed by AI development and deployment. While GPAI evaluations play an increasingly central role in institutional decision- and policy-making -- including by way of the European Union AI Act's mandate to conduct evaluations on GPAI models presenting systemic risk -- no standards exist to date to promote their quality or legitimacy. To strengthen GPAI evaluations in the EU, which currently constitutes the first and only jurisdiction that mandates GPAI evaluations, we outline four desiderata for GPAI evaluations: internal validity, external validity, reproducibility, and portability. To uphold these desiderata in a dynamic environment of continuously evolving risks, we propose a dedicated EU GPAI Evaluation Standards Taskforce, to be housed within the bodies established by the EU AI Act. We outline the responsibilities of the Taskforce, specify the GPAI provider commitments that would facilitate Taskforce success, discuss the potential impact of the Taskforce on global AI governance, and address potential sources of failure that policymakers should heed. </p> </div> </dd> <dt> <a name='item106'>[106]</a> <a href ="/abs/2411.13809" title="Abstract" id="2411.13809"> arXiv:2411.13809 </a> [<a href="/pdf/2411.13809" title="Download PDF" id="pdf-2411.13809" aria-labelledby="pdf-2411.13809">pdf</a>, <a href="/format/2411.13809" title="Other formats" id="oth-2411.13809" aria-labelledby="oth-2411.13809">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> DCSim: Computing and Networking Integration based Container Scheduling Simulator for Data Centers </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Hu,+J">Jinlong Hu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rao,+Z">Zhizhe Rao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+X">Xingchen Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Deng,+L">Lihao Deng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dong,+S">Shoubin Dong</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Distributed, Parallel, and Cluster Computing (cs.DC)</span> </div> <p class='mathjax'> The increasing prevalence of cloud-native technologies, particularly containers, has led to the widespread adoption of containerized deployments in data centers. The advancement of deep neural network models has increased the demand for container-based distributed model training and inference, where frequent data transmission among nodes has emerged as a significant performance bottleneck. However, traditional container scheduling simulators often overlook the influence of network modeling on the efficiency of container scheduling, primarily concentrating on modeling computational resources. In this paper, we focus on a container scheduling simulator based on collaboration between computing and networking within data centers. We propose a new container scheduling simulator for data centers, named DCSim. The simulator consists of several modules: a data center module, a network simulation module, a container scheduling module, a discrete event-driven module, and a data collection and analysis module. Together, these modules provide heterogeneous computing power modeling and dynamic network simulation capabilities. We design a discrete event model using SimPy to represent various aspects of container processing, including container requests, scheduling, execution, pauses, communication, migration, and termination within data centers. Among these, lightweight virtualization technology based on Mininet is employed to construct a software-defined network. An experimental environment for container scheduling simulation was established, and functional and performance tests were conducted on the simulator to validate its scheduling simulation capabilities. </p> </div> </dd> <dt> <a name='item107'>[107]</a> <a href ="/abs/2411.13811" title="Abstract" id="2411.13811"> arXiv:2411.13811 </a> [<a href="/pdf/2411.13811" title="Download PDF" id="pdf-2411.13811" aria-labelledby="pdf-2411.13811">pdf</a>, <a href="https://arxiv.org/html/2411.13811v1" title="View HTML" id="html-2411.13811" aria-labelledby="html-2411.13811" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13811" title="Other formats" id="oth-2411.13811" aria-labelledby="oth-2411.13811">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> X-CrossNet: A complex spectral mapping approach to target speaker extraction with cross attention speaker embedding fusion </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Sun,+C">Chang Sun</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Qin,+B">Bo Qin</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Multimedia (cs.MM); Audio and Speech Processing (eess.AS) </div> <p class='mathjax'> Target speaker extraction (TSE) is a technique for isolating a target speaker's voice from mixed speech using auxiliary features associated with the target speaker. This approach addresses the cocktail party problem and is generally considered more promising for practical applications than conventional speech separation methods. Although academic research in this area has achieved high accuracy and evaluation scores on public datasets, most models exhibit significantly reduced performance in real-world noisy or reverberant conditions. To address this limitation, we propose a novel TSE model, X-CrossNet, which leverages CrossNet as its backbone. CrossNet is a speech separation network specifically optimized for challenging noisy and reverberant environments, achieving state-of-the-art performance in tasks such as speaker separation under these conditions. Additionally, to enhance the network's ability to capture and utilize auxiliary features of the target speaker, we integrate a Cross-Attention mechanism into the global multi-head self-attention (GMHSA) module within each CrossNet block. This facilitates more effective integration of target speaker features with mixed speech features. Experimental results show that our method performs superior separation on the WSJ0-2mix and WHAMR! datasets, demonstrating strong robustness and stability. </p> </div> </dd> <dt> <a name='item108'>[108]</a> <a href ="/abs/2411.13814" title="Abstract" id="2411.13814"> arXiv:2411.13814 </a> [<a href="/pdf/2411.13814" title="Download PDF" id="pdf-2411.13814" aria-labelledby="pdf-2411.13814">pdf</a>, <a href="https://arxiv.org/html/2411.13814v1" title="View HTML" id="html-2411.13814" aria-labelledby="html-2411.13814" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13814" title="Other formats" id="oth-2411.13814" aria-labelledby="oth-2411.13814">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> AutoMixQ: Self-Adjusting Quantization for High Performance Memory-Efficient Fine-Tuning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+C">Changhai Zhou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+S">Shiyang Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+Y">Yuhua Zhou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Z">Zekai Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Weng,+S">Shichao Weng</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Fine-tuning large language models (LLMs) under resource constraints is a significant challenge in deep learning. Low-Rank Adaptation (LoRA), pruning, and quantization are all effective methods for improving resource efficiency. However, combining them directly often results in suboptimal performance, especially with uniform quantization across all model layers. This is due to the complex, uneven interlayer relationships introduced by pruning, necessitating more refined quantization strategies. To address this, we propose AutoMixQ, an end-to-end optimization framework that selects optimal quantization configurations for each LLM layer. AutoMixQ leverages lightweight performance models to guide the selection process, significantly reducing time and computational resources compared to exhaustive search methods. By incorporating Pareto optimality, AutoMixQ balances memory usage and performance, approaching the upper bounds of model capability under strict resource constraints. Our experiments on widely used benchmarks show that AutoMixQ reduces memory consumption while achieving superior performance. For example, at a 30\% pruning rate in LLaMA-7B, AutoMixQ achieved 66.21\% on BoolQ compared to 62.45\% for LoRA and 58.96\% for LoftQ, while reducing memory consumption by 35.5\% compared to LoRA and 27.5\% compared to LoftQ. </p> </div> </dd> <dt> <a name='item109'>[109]</a> <a href ="/abs/2411.13817" title="Abstract" id="2411.13817"> arXiv:2411.13817 </a> [<a href="/pdf/2411.13817" title="Download PDF" id="pdf-2411.13817" aria-labelledby="pdf-2411.13817">pdf</a>, <a href="https://arxiv.org/html/2411.13817v1" title="View HTML" id="html-2411.13817" aria-labelledby="html-2411.13817" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13817" title="Other formats" id="oth-2411.13817" aria-labelledby="oth-2411.13817">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Dynamic Structural Clustering Unleashed: Flexible Similarities, Versatile Updates and for All Parameters </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+Z">Zhuowei Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gan,+J">Junhao Gan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ruan,+B">Boyu Ruan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bao,+Z">Zhifeng Bao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Qi,+J">Jianzhong Qi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+S">Sibo Wang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Data Structures and Algorithms (cs.DS)</span> </div> <p class='mathjax'> We study structural clustering on graphs in dynamic scenarios, where the graphs can be updated by arbitrary insertions or deletions of edges/vertices. The goal is to efficiently compute structural clustering results for any clustering parameters $\epsilon$ and $\mu$ given on the fly, for arbitrary graph update patterns, and for all typical similarity measurements. Specifically, we adopt the idea of update affordability and propose an a-lot-simpler yet more efficient (both theoretically and practically) algorithm (than state of the art), named VD-STAR to handle graph updates. First, with a theoretical clustering result quality guarantee, VD-STAR can output high-quality clustering results with up to 99.9% accuracy. Second, our VD-STAR is easy to implement as it just needs to maintain certain sorted linked lists and hash tables, and hence, effectively enhances its deployment in practice. Third and most importantly, by careful analysis, VD-STAR improves the per-update time bound of the state-of-the-art from $O(\log^2 n)$ expected with certain update pattern assumption to $O(\log n)$ amortized in expectation without any update pattern assumption. We further design two variants of VD-STAR to enhance its empirical performance. Experimental results show that our algorithms consistently outperform the state-of-the-art competitors by up to 9,315 times in update time across nine real datasets. </p> </div> </dd> <dt> <a name='item110'>[110]</a> <a href ="/abs/2411.13819" title="Abstract" id="2411.13819"> arXiv:2411.13819 </a> [<a href="/pdf/2411.13819" title="Download PDF" id="pdf-2411.13819" aria-labelledby="pdf-2411.13819">pdf</a>, <a href="https://arxiv.org/html/2411.13819v1" title="View HTML" id="html-2411.13819" aria-labelledby="html-2411.13819" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13819" title="Other formats" id="oth-2411.13819" aria-labelledby="oth-2411.13819">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Robust Steganography with Boundary-Preserving Overflow Alleviation and Adaptive Error Correction </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Cheng,+Y">Yu Cheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Luo,+Z">Zhenlin Luo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yin,+Z">Zhaoxia Yin</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Cryptography and Security (cs.CR)</span>; Multimedia (cs.MM) </div> <p class='mathjax'> With the rapid evolution of the Internet, the vast amount of data has created opportunities for fostering the development of steganographic techniques. However, traditional steganographic techniques encounter challenges due to distortions in online social networks, such as JPEG recompression. Presently, research into the lossy operations of spatial truncation in JPEG recompression remains limited. Existing methods aim to ensure the stability of the quantized coefficients by reducing the effects of spatial truncation. Nevertheless, these approaches may induce notable alterations to image pixels, potentially compromising anti-steganalysis performance. In this study, we analyzed the overflow characteristics of spatial blocks and observed that pixel values at the boundaries of spatial blocks are more prone to overflow. Building upon this observation, we proposed a preprocessing method that performs overflow removal operations based on the actual overflow conditions of spatial blocks. After preprocessing, our algorithm enhances coefficient stability while minimizing modifications to spatial block boundaries, favoring image quality preservation. Subsequently, we employed adaptive error correction coding to reduce coding redundancy, thereby augmenting robustness and mitigating its impact on anti-steganalysis performance. The experimental results indicate that the proposed method possesses a strong embedding capacity, maintaining a high level of robustness while enhancing security. </p> </div> </dd> <dt> <a name='item111'>[111]</a> <a href ="/abs/2411.13820" title="Abstract" id="2411.13820"> arXiv:2411.13820 </a> [<a href="/pdf/2411.13820" title="Download PDF" id="pdf-2411.13820" aria-labelledby="pdf-2411.13820">pdf</a>, <a href="https://arxiv.org/html/2411.13820v1" title="View HTML" id="html-2411.13820" aria-labelledby="html-2411.13820" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13820" title="Other formats" id="oth-2411.13820" aria-labelledby="oth-2411.13820">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> InstCache: A Predictive Cache for LLM Serving </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zou,+L">Longwei Zou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+T">Tingfeng Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+K">Kai Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kong,+J">Jiangang Kong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Deng,+Y">Yangdong Deng</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Distributed, Parallel, and Cluster Computing (cs.DC) </div> <p class='mathjax'> Large language models are revolutionizing every aspect of human life. However, the unprecedented power comes at the cost of significant computing intensity, suggesting long latency and large energy footprint. Key-Value Cache and Semantic Cache have been proposed as a solution to the above problem, but both suffer from limited scalability due to significant memory cost for each token or instruction embeddings. Motivated by the observations that most instructions are short, repetitive and predictable by LLMs, we propose to predict user-instructions by an instruction-aligned LLM and store them in a predictive cache, so-called InstCache. We introduce an instruction pre-population algorithm based on the negative log likelihood of instructions, determining the cache size with regard to the hit rate. The proposed InstCache is efficiently implemented as a hash table with minimal lookup latency for deployment. Experimental results show that InstCache can achieve up to 51.34% hit rate on LMSys dataset, which corresponds to a 2x speedup, at a memory cost of only 4.5GB. </p> </div> </dd> <dt> <a name='item112'>[112]</a> <a href ="/abs/2411.13821" title="Abstract" id="2411.13821"> arXiv:2411.13821 </a> [<a href="/pdf/2411.13821" title="Download PDF" id="pdf-2411.13821" aria-labelledby="pdf-2411.13821">pdf</a>, <a href="https://arxiv.org/html/2411.13821v1" title="View HTML" id="html-2411.13821" aria-labelledby="html-2411.13821" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13821" title="Other formats" id="oth-2411.13821" aria-labelledby="oth-2411.13821">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Heterophilic Graph Neural Networks Optimization with Causal Message-passing </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+B">Botao Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+J">Jia Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chang,+H">Heng Chang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+K">Keli Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tsung,+F">Fugee Tsung</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Machine Learning (stat.ML) </div> <p class='mathjax'> In this work, we discover that causal inference provides a promising approach to capture heterophilic message-passing in Graph Neural Network (GNN). By leveraging cause-effect analysis, we can discern heterophilic edges based on asymmetric node dependency. The learned causal structure offers more accurate relationships among nodes. To reduce the computational complexity, we introduce intervention-based causal inference in graph learning. We first simplify causal analysis on graphs by formulating it as a structural learning model and define the optimization problem within the Bayesian scheme. We then present an analysis of decomposing the optimization target into a consistency penalty and a structure modification based on cause-effect relations. We then estimate this target by conditional entropy and present insights into how conditional entropy quantifies the heterophily. Accordingly, we propose CausalMP, a causal message-passing discovery network for heterophilic graph learning, that iteratively learns the explicit causal structure of input graphs. We conduct extensive experiments in both heterophilic and homophilic graph settings. The result demonstrates that the our model achieves superior link prediction performance. Training on causal structure can also enhance node representation in classification task across different base models. </p> </div> </dd> <dt> <a name='item113'>[113]</a> <a href ="/abs/2411.13826" title="Abstract" id="2411.13826"> arXiv:2411.13826 </a> [<a href="/pdf/2411.13826" title="Download PDF" id="pdf-2411.13826" aria-labelledby="pdf-2411.13826">pdf</a>, <a href="https://arxiv.org/html/2411.13826v1" title="View HTML" id="html-2411.13826" aria-labelledby="html-2411.13826" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13826" title="Other formats" id="oth-2411.13826" aria-labelledby="oth-2411.13826">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Interactive and Expressive Code-Augmented Planning with Large Language Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+A+Z">Anthony Z. Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+X">Xinhe Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sansom,+J">Jacob Sansom</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fu,+Y">Yao Fu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Choi,+J">Jongwook Choi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sohn,+S">Sungryull Sohn</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+J">Jaekyeom Kim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lee,+H">Honglak Lee</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Large Language Models (LLMs) demonstrate strong abilities in common-sense reasoning and interactive decision-making, but often struggle with complex, long-horizon planning tasks. Recent techniques have sought to structure LLM outputs using control flow and other code-adjacent techniques to improve planning performance. These techniques include using variables (to track important information) and functions (to divide complex tasks into smaller re-usable sub-tasks). However, purely code-based approaches can be error-prone and insufficient for handling ambiguous or unstructured data. To address these challenges, we propose REPL-Plan, an LLM planning approach that is fully code-expressive (it can utilize all the benefits of code) while also being dynamic (it can flexibly adapt from errors and use the LLM for fuzzy situations). In REPL-Plan, an LLM solves tasks by interacting with a Read-Eval-Print Loop (REPL), which iteratively executes and evaluates code, similar to language shells or interactive code notebooks, allowing the model to flexibly correct errors and handle tasks dynamically. We demonstrate that REPL-Plan achieves strong results across various planning domains compared to previous methods. </p> </div> </dd> <dt> <a name='item114'>[114]</a> <a href ="/abs/2411.13827" title="Abstract" id="2411.13827"> arXiv:2411.13827 </a> [<a href="/pdf/2411.13827" title="Download PDF" id="pdf-2411.13827" aria-labelledby="pdf-2411.13827">pdf</a>, <a href="https://arxiv.org/html/2411.13827v1" title="View HTML" id="html-2411.13827" aria-labelledby="html-2411.13827" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13827" title="Other formats" id="oth-2411.13827" aria-labelledby="oth-2411.13827">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Designing a Secure Device-to-Device File Transfer Mechanism </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Rahalkar,+C">Chaitanya Rahalkar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Virgaonkar,+A">Anushka Virgaonkar</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 7 pages, 3 tables, 3 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Cryptography and Security (cs.CR)</span> </div> <p class='mathjax'> Secure, reliable, and fast transfer of files across the Internet is a problem attempted to be solved through many application-layer protocols. In this paper, we aim to design a secure, reliable, opendesign, and performant file transfer protocol that is inspired by the WebRTC protocol stack. Traditionally, transferring files involves a publicly exposed (available on the public network) third-party server that serves the uploaded files to the receiver. Here, the third party server has to bear the storage and bandwidth cost to transfer the files between the two parties. We propose a protocol that uses a relay server to relay the files from the client to the server. A relay server has several advantages over a regular file-hosting server. Firstly, a relay server does not retain the uploaded files, it simply relays them. Secondly, a relay server has a full-duplex communication channel and therefore the receiver is not required to wait for the sender to upload the files completely. In this paper, we study available file transfer approaches and their known flaws. We propose our idea and compare our stack with the WebRTC stack. Finally, we perform empirical analysis and, benchmark our device-to-device transfer approach along with other available options including WebRTC. </p> </div> </dd> <dt> <a name='item115'>[115]</a> <a href ="/abs/2411.13834" title="Abstract" id="2411.13834"> arXiv:2411.13834 </a> [<a href="/pdf/2411.13834" title="Download PDF" id="pdf-2411.13834" aria-labelledby="pdf-2411.13834">pdf</a>, <a href="https://arxiv.org/html/2411.13834v1" title="View HTML" id="html-2411.13834" aria-labelledby="html-2411.13834" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13834" title="Other formats" id="oth-2411.13834" aria-labelledby="oth-2411.13834">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Spatiotemporal Tubes for Temporal Reach-Avoid-Stay Tasks in Unknown Systems </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Das,+R">Ratnangshu Das</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Basu,+A">Ahan Basu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Jagtap,+P">Pushpak Jagtap</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Systems and Control (eess.SY)</span>; Robotics (cs.RO) </div> <p class='mathjax'> The paper considers the controller synthesis problem for general MIMO systems with unknown dynamics, aiming to fulfill the temporal reach-avoid-stay task, where the unsafe regions are time-dependent, and the target must be reached within a specified time frame. The primary aim of the paper is to construct the spatiotemporal tube (STT) using a sampling-based approach and thereby devise a closed-form approximation-free control strategy to ensure that system trajectory reaches the target set while avoiding time-dependent unsafe sets. The proposed scheme utilizes a novel method involving STTs to provide controllers that guarantee both system safety and reachability. In our sampling-based framework, we translate the requirements of STTs into a Robust optimization program (ROP). To address the infeasibility of ROP caused by infinite constraints, we utilize the sampling-based Scenario optimization program (SOP). Subsequently, we solve the SOP to generate the tube and closed-form controller for an unknown system, ensuring the temporal reach-avoid-stay specification. Finally, the effectiveness of the proposed approach is demonstrated through three case studies: an omnidirectional robot, a SCARA manipulator, and a magnetic levitation system. </p> </div> </dd> <dt> <a name='item116'>[116]</a> <a href ="/abs/2411.13836" title="Abstract" id="2411.13836"> arXiv:2411.13836 </a> [<a href="/pdf/2411.13836" title="Download PDF" id="pdf-2411.13836" aria-labelledby="pdf-2411.13836">pdf</a>, <a href="https://arxiv.org/html/2411.13836v1" title="View HTML" id="html-2411.13836" aria-labelledby="html-2411.13836" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13836" title="Other formats" id="oth-2411.13836" aria-labelledby="oth-2411.13836">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> CLIPer: Hierarchically Improving Spatial Representation of CLIP for Open-Vocabulary Semantic Segmentation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Sun,+L">Lin Sun</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cao,+J">Jiale Cao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xie,+J">Jin Xie</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jiang,+X">Xiaoheng Jiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pang,+Y">Yanwei Pang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Homepange and code: <a href="https://linsun449.github.io/cliper" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Contrastive Language-Image Pre-training (CLIP) exhibits strong zero-shot classification ability on various image-level tasks, leading to the research to adapt CLIP for pixel-level open-vocabulary semantic segmentation without additional training. The key is to improve spatial representation of image-level CLIP, such as replacing self-attention map at last layer with self-self attention map or vision foundation model based attention map. In this paper, we present a novel hierarchical framework, named CLIPer, that hierarchically improves spatial representation of CLIP. The proposed CLIPer includes an early-layer fusion module and a fine-grained compensation module. We observe that, the embeddings and attention maps at early layers can preserve spatial structural information. Inspired by this, we design the early-layer fusion module to generate segmentation map with better spatial coherence. Afterwards, we employ a fine-grained compensation module to compensate the local details using the self-attention maps of diffusion model. We conduct the experiments on seven segmentation datasets. Our proposed CLIPer achieves the state-of-the-art performance on these datasets. For instance, using ViT-L, CLIPer has the mIoU of 69.8% and 43.3% on VOC and COCO Object, outperforming ProxyCLIP by 9.2% and 4.1% respectively. </p> </div> </dd> <dt> <a name='item117'>[117]</a> <a href ="/abs/2411.13840" title="Abstract" id="2411.13840"> arXiv:2411.13840 </a> [<a href="/pdf/2411.13840" title="Download PDF" id="pdf-2411.13840" aria-labelledby="pdf-2411.13840">pdf</a>, <a href="https://arxiv.org/html/2411.13840v1" title="View HTML" id="html-2411.13840" aria-labelledby="html-2411.13840" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13840" title="Other formats" id="oth-2411.13840" aria-labelledby="oth-2411.13840">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Segment Anything in Light Fields for Real-Time Applications via Constrained Prompting </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Goncharov,+N">Nikolai Goncharov</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dansereau,+D+G">Donald G. Dansereau</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Segmented light field images can serve as a powerful representation in many of computer vision tasks exploiting geometry and appearance of objects, such as object pose tracking. In the light field domain, segmentation presents an additional objective of recognizing the same segment through all the views. Segment Anything Model 2 (SAM 2) allows producing semantically meaningful segments for monocular images and videos. However, using SAM 2 directly on light fields is highly ineffective due to unexploited constraints. In this work, we present a novel light field segmentation method that adapts SAM 2 to the light field domain without retraining or modifying the model. By utilizing the light field domain constraints, the method produces high quality and view-consistent light field masks, outperforming the SAM 2 video tracking baseline and working 7 times faster, with a real-time speed. We achieve this by exploiting the epipolar geometry cues to propagate the masks between the views, probing the SAM 2 latent space to estimate their occlusion, and further prompting SAM 2 for their refinement. </p> </div> </dd> <dt> <a name='item118'>[118]</a> <a href ="/abs/2411.13842" title="Abstract" id="2411.13842"> arXiv:2411.13842 </a> [<a href="/pdf/2411.13842" title="Download PDF" id="pdf-2411.13842" aria-labelledby="pdf-2411.13842">pdf</a>, <a href="https://arxiv.org/html/2411.13842v1" title="View HTML" id="html-2411.13842" aria-labelledby="html-2411.13842" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13842" title="Other formats" id="oth-2411.13842" aria-labelledby="oth-2411.13842">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Detecting Human Artifacts from Text-to-Image Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+K">Kaihong Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+L">Lingzhi Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+J">Jianming Zhang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Despite recent advancements, text-to-image generation models often produce images containing artifacts, especially in human figures. These artifacts appear as poorly generated human bodies, including distorted, missing, or extra body parts, leading to visual inconsistencies with typical human anatomy and greatly impairing overall fidelity. In this study, we address this challenge by curating Human Artifact Dataset (HAD), the first large-scale dataset specifically designed to identify and localize human artifacts. HAD comprises over 37,000 images generated by several popular text-to-image models, annotated for human artifact localization. Using this dataset, we train the Human Artifact Detection Models (HADM), which can identify diverse artifact types across multiple generative domains and demonstrate strong generalization, even on images from unseen generators. Additionally, to further improve generators' perception of human structural coherence, we use the predictions from our HADM as feedback for diffusion model finetuning. Our experiments confirm a reduction in human artifacts in the resulting model. Furthermore, we showcase a novel application of our HADM in an iterative inpainting framework to correct human artifacts in arbitrary images directly, demonstrating its utility in improving image quality. Our dataset and detection models are available at: \url{<a href="https://github.com/wangkaihong/HADM" rel="external noopener nofollow" class="link-external link-https">this https URL</a>}. </p> </div> </dd> <dt> <a name='item119'>[119]</a> <a href ="/abs/2411.13846" title="Abstract" id="2411.13846"> arXiv:2411.13846 </a> [<a href="/pdf/2411.13846" title="Download PDF" id="pdf-2411.13846" aria-labelledby="pdf-2411.13846">pdf</a>, <a href="https://arxiv.org/html/2411.13846v1" title="View HTML" id="html-2411.13846" aria-labelledby="html-2411.13846" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13846" title="Other formats" id="oth-2411.13846" aria-labelledby="oth-2411.13846">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Exploratory Study Of Human-AI Interaction For Hindustani Music </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Shikarpur,+N">Nithya Shikarpur</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+C+A">Cheng-Zhi Anna Huang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted at NeurIPS Creative AI Track 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Human-Computer Interaction (cs.HC)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> This paper presents a study of participants interacting with and using GaMaDHaNi, a novel hierarchical generative model for Hindustani vocal contours. To explore possible use cases in human-AI interaction, we conducted a user study with three participants, each engaging with the model through three predefined interaction modes. Although this study was conducted "in the wild"- with the model unadapted for the shift from the training data to real-world interaction - we use it as a pilot to better understand the expectations, reactions, and preferences of practicing musicians when engaging with such a model. We note their challenges as (1) the lack of restrictions in model output, and (2) the incoherence of model output. We situate these challenges in the context of Hindustani music and aim to suggest future directions for the model design to address these gaps. </p> </div> </dd> <dt> <a name='item120'>[120]</a> <a href ="/abs/2411.13847" title="Abstract" id="2411.13847"> arXiv:2411.13847 </a> [<a href="/pdf/2411.13847" title="Download PDF" id="pdf-2411.13847" aria-labelledby="pdf-2411.13847">pdf</a>, <a href="https://arxiv.org/html/2411.13847v1" title="View HTML" id="html-2411.13847" aria-labelledby="html-2411.13847" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13847" title="Other formats" id="oth-2411.13847" aria-labelledby="oth-2411.13847">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Multitask Learning for SAR Ship Detection with Gaussian-Mask Joint Segmentation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+M">Ming Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+X">Xin Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kaup,+A">Andr茅 Kaup</a></div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> IEEE Transactions on Geoscience and Remote Sensing, vol. 61, pp. 1-16, 2023, Art no. 5214516 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Detecting ships in synthetic aperture radar (SAR) images is challenging due to strong speckle noise, complex surroundings, and varying scales. This paper proposes MLDet, a multitask learning framework for SAR ship detection, consisting of object detection, speckle suppression, and target segmentation tasks. An angle classification loss with aspect ratio weighting is introduced to improve detection accuracy by addressing angular periodicity and object proportions. The speckle suppression task uses a dual-feature fusion attention mechanism to reduce noise and fuse shallow and denoising features, enhancing robustness. The target segmentation task, leveraging a rotated Gaussian-mask, aids the network in extracting target regions from cluttered backgrounds and improves detection efficiency with pixel-level predictions. The Gaussian-mask ensures ship centers have the highest probabilities, gradually decreasing outward under a Gaussian distribution. Additionally, a weighted rotated boxes fusion (WRBF) strategy combines multi-direction anchor predictions, filtering anchors beyond boundaries or with high overlap but low confidence. Extensive experiments on SSDD+ and HRSID datasets demonstrate the effectiveness and superiority of MLDet. </p> </div> </dd> <dt> <a name='item121'>[121]</a> <a href ="/abs/2411.13848" title="Abstract" id="2411.13848"> arXiv:2411.13848 </a> [<a href="/pdf/2411.13848" title="Download PDF" id="pdf-2411.13848" aria-labelledby="pdf-2411.13848">pdf</a>, <a href="https://arxiv.org/html/2411.13848v1" title="View HTML" id="html-2411.13848" aria-labelledby="html-2411.13848" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13848" title="Other formats" id="oth-2411.13848" aria-labelledby="oth-2411.13848">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Exact and approximate error bounds for physics-informed neural networks </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Chantada,+A+T">Augusto T. Chantada</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Protopapas,+P">Pavlos Protopapas</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bachar,+L+G">Luca Gomez Bachar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Landau,+S+J">Susana J. Landau</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sc%C3%B3ccola,+C+G">Claudia G. Sc贸ccola</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 10 pages, 1 figure, accepted to NeurIPS 2024 Workshop on Machine Learning and the Physical Sciences </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Numerical Analysis (math.NA) </div> <p class='mathjax'> The use of neural networks to solve differential equations, as an alternative to traditional numerical solvers, has increased recently. However, error bounds for the obtained solutions have only been developed for certain equations. In this work, we report important progress in calculating error bounds of physics-informed neural networks (PINNs) solutions of nonlinear first-order ODEs. We give a general expression that describes the error of the solution that the PINN-based method provides for a nonlinear first-order ODE. In addition, we propose a technique to calculate an approximate bound for the general case and an exact bound for a particular case. The error bounds are computed using only the residual information and the equation structure. We apply the proposed methods to particular cases and show that they can successfully provide error bounds without relying on the numerical solution. </p> </div> </dd> <dt> <a name='item122'>[122]</a> <a href ="/abs/2411.13851" title="Abstract" id="2411.13851"> arXiv:2411.13851 </a> [<a href="/pdf/2411.13851" title="Download PDF" id="pdf-2411.13851" aria-labelledby="pdf-2411.13851">pdf</a>, <a href="https://arxiv.org/html/2411.13851v1" title="View HTML" id="html-2411.13851" aria-labelledby="html-2411.13851" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13851" title="Other formats" id="oth-2411.13851" aria-labelledby="oth-2411.13851">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Arm Robot: AR-Enhanced Embodied Control and Visualization for Intuitive Robot Arm Manipulation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Pei,+S">Siyou Pei</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+A">Alexander Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kaoshik,+R">Ronak Kaoshik</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Du,+R">Ruofei Du</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Y">Yang Zhang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span>; Human-Computer Interaction (cs.HC) </div> <p class='mathjax'> Embodied interaction has been introduced to human-robot interaction (HRI) as a type of teleoperation, in which users control robot arms with bodily action via handheld controllers or haptic gloves. Embodied teleoperation has made robot control intuitive to non-technical users, but differences between humans' and robots' capabilities \eg ranges of motion and response time, remain challenging. In response, we present Arm Robot, an embodied robot arm teleoperation system that helps users tackle human-robot discrepancies. Specifically, Arm Robot (1) includes AR visualization as real-time feedback on temporal and spatial discrepancies, and (2) allows users to change observing perspectives and expand action space. We conducted a user study (N=18) to investigate the usability of the Arm Robot and learn how users perceive the embodiment. Our results show users could use Arm Robot's features to effectively control the robot arm, providing insights for continued work in embodied HRI. </p> </div> </dd> <dt> <a name='item123'>[123]</a> <a href ="/abs/2411.13852" title="Abstract" id="2411.13852"> arXiv:2411.13852 </a> [<a href="/pdf/2411.13852" title="Download PDF" id="pdf-2411.13852" aria-labelledby="pdf-2411.13852">pdf</a>, <a href="/format/2411.13852" title="Other formats" id="oth-2411.13852" aria-labelledby="oth-2411.13852">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Dealing with Synthetic Data Contamination in Online Continual Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+M">Maorong Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Michel,+N">Nicolas Michel</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mao,+J">Jiafeng Mao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yamasaki,+T">Toshihiko Yamasaki</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted to NeurIPS'24 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Image generation has shown remarkable results in generating high-fidelity realistic images, in particular with the advancement of diffusion-based models. However, the prevalence of AI-generated images may have side effects for the machine learning community that are not clearly identified. Meanwhile, the success of deep learning in computer vision is driven by the massive dataset collected on the Internet. The extensive quantity of synthetic data being added to the Internet would become an obstacle for future researchers to collect "clean" datasets without AI-generated content. Prior research has shown that using datasets contaminated by synthetic images may result in performance degradation when used for training. In this paper, we investigate the potential impact of contaminated datasets on Online Continual Learning (CL) research. We experimentally show that contaminated datasets might hinder the training of existing online CL methods. Also, we propose Entropy Selection with Real-synthetic similarity Maximization (ESRM), a method to alleviate the performance deterioration caused by synthetic images when training online CL models. Experiments show that our method can significantly alleviate performance deterioration, especially when the contamination is severe. For reproducibility, the source code of our work is available at <a href="https://github.com/maorong-wang/ESRM" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item124'>[124]</a> <a href ="/abs/2411.13854" title="Abstract" id="2411.13854"> arXiv:2411.13854 </a> [<a href="/pdf/2411.13854" title="Download PDF" id="pdf-2411.13854" aria-labelledby="pdf-2411.13854">pdf</a>, <a href="https://arxiv.org/html/2411.13854v1" title="View HTML" id="html-2411.13854" aria-labelledby="html-2411.13854" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13854" title="Other formats" id="oth-2411.13854" aria-labelledby="oth-2411.13854">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Static Reuse Profile Estimation for Array Applications </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Razzak,+A">Abdur Razzak</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Barai,+A">Atanu Barai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Santhi,+N">Nandakishore Santhi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Badawy,+A+A">Abdel-Hameed A. Badawy</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted in The International Symposium on Memory Systems (MEMSYS 24), September 30 to October 03, 2024, Washington, DC, USA </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Performance (cs.PF)</span> </div> <p class='mathjax'> Reuse distance analysis is a widely recognized method for application characterization that illustrates cache locality. Although there are various techniques to calculate the reuse profile from dynamic memory traces, it is both time and space-consuming due to the requirement to collect dynamic memory traces at runtime. In contrast, static analysis reuse profile estimation is a promisingly faster approach since it is calculated at compile time without running the program or collecting memory traces. This work presents a static analysis technique to estimate the reuse profile of loop-based programs. For an input program, we generate a basic block-level control flow graph and the execution count by analyzing the LLVM IR of the program. We present the memory accesses of the application kernel in a compact bracketed format and use a recursive algorithm to predict the reuse distance histogram. We deploy a separate predictor that unrolls the loop(s) for smaller bounds and generates a temporary reuse distance profile for those small cases. Using these smaller profiles, the reuse profile is extrapolated for the actual loop bound(s). We use this reuse profile to predict the cache hit rate. Results show that our model can predict cache hit rates with an average accuracy of 95% relative to the dynamic reuse profile methods. </p> </div> </dd> <dt> <a name='item125'>[125]</a> <a href ="/abs/2411.13856" title="Abstract" id="2411.13856"> arXiv:2411.13856 </a> [<a href="/pdf/2411.13856" title="Download PDF" id="pdf-2411.13856" aria-labelledby="pdf-2411.13856">pdf</a>, <a href="https://arxiv.org/html/2411.13856v1" title="View HTML" id="html-2411.13856" aria-labelledby="html-2411.13856" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13856" title="Other formats" id="oth-2411.13856" aria-labelledby="oth-2411.13856">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Data-Driven Modeling and Motion Control of Heavy-Load Hydraulic Manipulators via Reversible Transformation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ma,+D">Dexian Ma</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Y">Yirong Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+W">Wenbo Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+B">Bo Zhou</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span> </div> <p class='mathjax'> This work proposes a data-driven modeling and the corresponding hybrid motion control framework for unmanned and automated operation of industrial heavy-load hydraulic manipulator. Rather than the direct use of a neural network black box, we construct a reversible nonlinear model by using multilayer perceptron to approximate dynamics in the physical integrator chain system after reversible transformations. The reversible nonlinear model is trained offline using supervised learning techniques, and the data are obtained from simulations or experiments. Entire hybrid motion control framework consists of the model inversion controller that compensates for the nonlinear dynamics and proportional-derivative controller that enhances the robustness. The stability is proved with Lyapunov theory. Co-simulation and Experiments show the effectiveness of proposed modeling and hybrid control framework. With a commercial 39-ton class hydraulic excavator for motion control tasks, the root mean square error of trajectory tracking error decreases by at least 50\% compared to traditional control methods. In addition, by analyzing the system model, the proposed framework can be rapidly applied to different control plants. </p> </div> </dd> <dt> <a name='item126'>[126]</a> <a href ="/abs/2411.13859" title="Abstract" id="2411.13859"> arXiv:2411.13859 </a> [<a href="/pdf/2411.13859" title="Download PDF" id="pdf-2411.13859" aria-labelledby="pdf-2411.13859">pdf</a>, <a href="https://arxiv.org/html/2411.13859v1" title="View HTML" id="html-2411.13859" aria-labelledby="html-2411.13859" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13859" title="Other formats" id="oth-2411.13859" aria-labelledby="oth-2411.13859">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Data-Driven Multi-step Nonlinear Model Predictive Control for Industrial Heavy Load Hydraulic Robot </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ma,+D">Dexian Ma</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+B">Bo Zhou</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span> </div> <p class='mathjax'> Automating complex industrial robots requires precise nonlinear control and efficient energy management. This paper introduces a data-driven nonlinear model predictive control (NMPC) framework to optimize control under multiple objectives. To enhance the prediction accuracy of the dynamic model, we design a single-shot multi-step prediction (SSMP) model based on long short-term memory (LSTM) and multilayer perceptrons (MLP), which can directly obtain the predictive horizon without iterative repetition and reduce computational pressure. Moreover, we combine offline and online models to address disturbances stemming from environmental interactions, similar to the superposition of the robot's free and forced responses. The online model learns the system's variations from the prediction mismatches of the offline model and updates its weights in real time. The proposed hybrid predictive model simplifies the relationship between inputs and outputs into matrix multiplication, which can quickly obtain the derivative. Therefore, the solution for the control signal sequence employs a gradient descent method with an adaptive learning rate, allowing the NMPC cost function to be formulated as a convex function incorporating critical states. The learning rate is dynamically adjusted based on state errors to counteract the inherent prediction inaccuracies of neural networks. The controller outputs the average value of the control signal sequence instead of the first value. Simulations and experiments on a 22-ton hydraulic excavator have validated the effectiveness of our method, showing that the proposed NMPC approach can be widely applied to industrial systems, including nonlinear control and energy management. </p> </div> </dd> <dt> <a name='item127'>[127]</a> <a href ="/abs/2411.13860" title="Abstract" id="2411.13860"> arXiv:2411.13860 </a> [<a href="/pdf/2411.13860" title="Download PDF" id="pdf-2411.13860" aria-labelledby="pdf-2411.13860">pdf</a>, <a href="https://arxiv.org/html/2411.13860v1" title="View HTML" id="html-2411.13860" aria-labelledby="html-2411.13860" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13860" title="Other formats" id="oth-2411.13860" aria-labelledby="oth-2411.13860">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Decoupled Sparse Priors Guided Diffusion Compression Model for Point Clouds </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+X">Xiaoge Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+Z">Zijie Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nasim,+M">Mehwish Nasim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Feng,+M">Mingtao Feng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mian,+A">Ajmal Mian</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Image and Video Processing (eess.IV) </div> <p class='mathjax'> Lossy compression methods rely on an autoencoder to transform a point cloud into latent points for storage, leaving the inherent redundancy of latent representations unexplored. To reduce redundancy in latent points, we propose a sparse priors guided method that achieves high reconstruction quality, especially at high compression ratios. This is accomplished by a dual-density scheme separately processing the latent points (intended for reconstruction) and the decoupled sparse priors (intended for storage). Our approach features an efficient dual-density data flow that relaxes size constraints on latent points, and hybridizes a progressive conditional diffusion model to encapsulate essential details for reconstruction within the conditions, which are decoupled hierarchically to intra-point and inter-point priors. Specifically, our method encodes the original point cloud into latent points and decoupled sparse priors through separate encoders. Latent points serve as intermediates, while sparse priors act as adaptive conditions. We then employ a progressive attention-based conditional denoiser to generate latent points conditioned on the decoupled priors, allowing the denoiser to dynamically attend to geometric and semantic cues from the priors at each encoding and decoding layer. Additionally, we integrate the local distribution into the arithmetic encoder and decoder to enhance local context modeling of the sparse points. The original point cloud is reconstructed through a point decoder. Compared to state-of-the-art, our method obtains superior rate-distortion trade-off, evidenced by extensive evaluations on the ShapeNet dataset and standard test datasets from MPEG group including 8iVFB, and Owlii. </p> </div> </dd> <dt> <a name='item128'>[128]</a> <a href ="/abs/2411.13861" title="Abstract" id="2411.13861"> arXiv:2411.13861 </a> [<a href="/pdf/2411.13861" title="Download PDF" id="pdf-2411.13861" aria-labelledby="pdf-2411.13861">pdf</a>, <a href="https://arxiv.org/html/2411.13861v1" title="View HTML" id="html-2411.13861" aria-labelledby="html-2411.13861" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13861" title="Other formats" id="oth-2411.13861" aria-labelledby="oth-2411.13861">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Asynchronous Federated Learning Using Outdated Local Updates Over TDMA Channel </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Song,+J">Jaeyoung Song</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hong,+J">Jun-Pyo Hong</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Distributed, Parallel, and Cluster Computing (cs.DC)</span> </div> <p class='mathjax'> In this paper, we consider asynchronous federated learning (FL) over time-division multiple access (TDMA)-based communication networks. <br>Considering TDMA for transmitting local updates can introduce significant delays to conventional synchronous FL, where all devices start local training from a common global model. In the proposed asynchronous FL approach, we partition devices into multiple TDMA groups, enabling simultaneous local computation and communication across different groups. This enhances time efficiency at the expense of staleness of local updates. We derive the relationship between the staleness of local updates and the size of the TDMA group in a training round. Moreover, our convergence analysis shows that although outdated local updates hinder appropriate global model updates, asynchronous FL over the TDMA channel converges even in the presence of data heterogeneity. Notably, the analysis identifies the impact of outdated local updates on convergence rate. <br>Based on observations from our convergence rate, we refine asynchronous FL strategy by introducing an intentional delay in local training. <br>This refinement accelerates the convergence by reducing the staleness of local updates. <br>Our extensive simulation results demonstrate that asynchronous FL with the intentional delay can rapidly reduce global loss by lowering the staleness of local updates in resource-limited wireless communication networks. </p> </div> </dd> <dt> <a name='item129'>[129]</a> <a href ="/abs/2411.13865" title="Abstract" id="2411.13865"> arXiv:2411.13865 </a> [<a href="/pdf/2411.13865" title="Download PDF" id="pdf-2411.13865" aria-labelledby="pdf-2411.13865">pdf</a>, <a href="https://arxiv.org/html/2411.13865v1" title="View HTML" id="html-2411.13865" aria-labelledby="html-2411.13865" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13865" title="Other formats" id="oth-2411.13865" aria-labelledby="oth-2411.13865">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> HARec: Hyperbolic Graph-LLM Alignment for Exploration and Exploitation in Recommender Systems </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ma,+Q">Qiyao Ma</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+M">Menglin Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ju,+M">Mingxuan Ju</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+T">Tong Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shah,+N">Neil Shah</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ying,+R">Rex Ying</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Information Retrieval (cs.IR)</span>; Artificial Intelligence (cs.AI); Computation and Language (cs.CL); Machine Learning (cs.LG) </div> <p class='mathjax'> Modern recommendation systems often create information cocoons, limiting users' exposure to diverse content. To enhance user experience, a crucial challenge is developing systems that can balance content exploration and exploitation, allowing users to adjust their recommendation preferences. Intuitively, this balance can be achieved through a tree-structured representation, where depth search facilitates exploitation and breadth search enables exploration. However, current works face two challenges to achieve this target: (1) Euclidean methods fail to fully capture hierarchical structures and lack flexibility in balancing exploration-exploitation, while (2) hyperbolic approaches, despite better hierarchical modeling, suffer from insufficient semantic alignment due to their reliance on Euclidean text encoders. To address these challenges, we propose HARec, a hyperbolic representation learning framework that jointly aligns user-item collaborative information with textual descriptions in hyperbolic space. Our framework introduces two key technique novelty: (1) a hierarchical-aware graph-llm alignment mechanism that enables better hierarchical representation, and (2) a hyperbolic hierarchical tree structure that facilitates user-adjustable exploration-exploitation trade-offs. Extensive experiments demonstrate that HARec consistently outperforms both Euclidean and hyperbolic baselines, achieving up to 5.49% improvement in utility metrics and 11.39% increase in diversity metrics. </p> </div> </dd> <dt> <a name='item130'>[130]</a> <a href ="/abs/2411.13867" title="Abstract" id="2411.13867"> arXiv:2411.13867 </a> [<a href="/pdf/2411.13867" title="Download PDF" id="pdf-2411.13867" aria-labelledby="pdf-2411.13867">pdf</a>, <a href="/format/2411.13867" title="Other formats" id="oth-2411.13867" aria-labelledby="oth-2411.13867">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Generative Fuzzy System for Sequence Generation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+H">Hailong Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Deng,+Z">Zhaohong Deng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+W">Wei Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+Z">Zhuangzhuang Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+G">Guanjin Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Choi,+K">Kup-sze Choi</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 12 pages, 5 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Artificial Intelligence (cs.AI)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Generative Models (GMs), particularly Large Language Models (LLMs), have garnered significant attention in machine learning and artificial intelligence for their ability to generate new data by learning the statistical properties of training data and creating data that resemble the original. This capability offers a wide range of applications across various domains. However, the complex structures and numerous model parameters of GMs make the input-output processes opaque, complicating the understanding and control of outputs. Moreover, the purely data-driven learning mechanism limits GM's ability to acquire broader knowledge. There remains substantial potential for enhancing the robustness and generalization capabilities of GMs. In this work, we introduce the fuzzy system, a classical modeling method that combines data and knowledge-driven mechanisms, to generative tasks. We propose a novel Generative Fuzzy System framework, named GenFS, which integrates the deep learning capabilities of GM with the interpretability and dual-driven mechanisms of fuzzy systems. Specifically, we propose an end-to-end GenFS-based model for sequence generation, called FuzzyS2S. A series of experimental studies were conducted on 12 datasets, covering three distinct categories of generative tasks: machine translation, code generation, and summary generation. The results demonstrate that FuzzyS2S outperforms the Transformer in terms of accuracy and fluency. Furthermore, it exhibits better performance on some datasets compared to state-of-the-art models T5 and CodeT5. </p> </div> </dd> <dt> <a name='item131'>[131]</a> <a href ="/abs/2411.13868" title="Abstract" id="2411.13868"> arXiv:2411.13868 </a> [<a href="/pdf/2411.13868" title="Download PDF" id="pdf-2411.13868" aria-labelledby="pdf-2411.13868">pdf</a>, <a href="https://arxiv.org/html/2411.13868v1" title="View HTML" id="html-2411.13868" aria-labelledby="html-2411.13868" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13868" title="Other formats" id="oth-2411.13868" aria-labelledby="oth-2411.13868">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Robust Detection of Watermarks for Large Language Models Under Human Edits </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+X">Xiang Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ruan,+F">Feng Ruan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+H">Huiyuan Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Long,+Q">Qi Long</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Su,+W+J">Weijie J. Su</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Computation and Language (cs.CL); Statistics Theory (math.ST); Methodology (stat.ME); Machine Learning (stat.ML) </div> <p class='mathjax'> Watermarking has offered an effective approach to distinguishing text generated by large language models (LLMs) from human-written text. However, the pervasive presence of human edits on LLM-generated text dilutes watermark signals, thereby significantly degrading detection performance of existing methods. In this paper, by modeling human edits through mixture model detection, we introduce a new method in the form of a truncated goodness-of-fit test for detecting watermarked text under human edits, which we refer to as Tr-GoF. We prove that the Tr-GoF test achieves optimality in robust detection of the Gumbel-max watermark in a certain asymptotic regime of substantial text modifications and vanishing watermark signals. Importantly, Tr-GoF achieves this optimality \textit{adaptively} as it does not require precise knowledge of human edit levels or probabilistic specifications of the LLMs, in contrast to the optimal but impractical (Neyman--Pearson) likelihood ratio test. Moreover, we establish that the Tr-GoF test attains the highest detection efficiency rate in a certain regime of moderate text modifications. In stark contrast, we show that sum-based detection rules, as employed by existing methods, fail to achieve optimal robustness in both regimes because the additive nature of their statistics is less resilient to edit-induced noise. Finally, we demonstrate the competitive and sometimes superior empirical performance of the Tr-GoF test on both synthetic data and open-source LLMs in the OPT and LLaMA families. </p> </div> </dd> <dt> <a name='item132'>[132]</a> <a href ="/abs/2411.13873" title="Abstract" id="2411.13873"> arXiv:2411.13873 </a> [<a href="/pdf/2411.13873" title="Download PDF" id="pdf-2411.13873" aria-labelledby="pdf-2411.13873">pdf</a>, <a href="https://arxiv.org/html/2411.13873v1" title="View HTML" id="html-2411.13873" aria-labelledby="html-2411.13873" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13873" title="Other formats" id="oth-2411.13873" aria-labelledby="oth-2411.13873">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Sli2Vol+: Segmenting 3D Medical Images Based on an Object Estimation Guided Correspondence Flow Network </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=An,+D">Delin An</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gu,+P">Pengfei Gu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sonka,+M">Milan Sonka</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+C">Chaoli Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+D+Z">Danny Z. Chen</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Deep learning (DL) methods have shown remarkable successes in medical image segmentation, often using large amounts of annotated data for model training. However, acquiring a large number of diverse labeled 3D medical image datasets is highly difficult and expensive. Recently, mask propagation DL methods were developed to reduce the annotation burden on 3D medical images. For example, Sli2Vol~\cite{yeung2021sli2vol} proposed a self-supervised framework (SSF) to learn correspondences by matching neighboring slices via slice reconstruction in the training stage; the learned correspondences were then used to propagate a labeled slice to other slices in the test stage. But, these methods are still prone to error accumulation due to the inter-slice propagation of reconstruction errors. Also, they do not handle discontinuities well, which can occur between consecutive slices in 3D images, as they emphasize exploiting object continuity. To address these challenges, in this work, we propose a new SSF, called \proposed, {for segmenting any anatomical structures in 3D medical images using only a single annotated slice per training and testing volume.} Specifically, in the training stage, we first propagate an annotated 2D slice of a training volume to the other slices, generating pseudo-labels (PLs). Then, we develop a novel Object Estimation Guided Correspondence Flow Network to learn reliable correspondences between consecutive slices and corresponding PLs in a self-supervised manner. In the test stage, such correspondences are utilized to propagate a single annotated slice to the other slices of a test volume. We demonstrate the effectiveness of our method on various medical image segmentation tasks with different datasets, showing better generalizability across different organs, modalities, and modals. Code is available at \url{<a href="https://github.com/adlsn/Sli2Volplus" rel="external noopener nofollow" class="link-external link-https">this https URL</a>} </p> </div> </dd> <dt> <a name='item133'>[133]</a> <a href ="/abs/2411.13874" title="Abstract" id="2411.13874"> arXiv:2411.13874 </a> [<a href="/pdf/2411.13874" title="Download PDF" id="pdf-2411.13874" aria-labelledby="pdf-2411.13874">pdf</a>, <a href="https://arxiv.org/html/2411.13874v1" title="View HTML" id="html-2411.13874" aria-labelledby="html-2411.13874" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13874" title="Other formats" id="oth-2411.13874" aria-labelledby="oth-2411.13874">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Next-Generation Phishing: How LLM Agents Empower Cyber Attackers </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Afane,+K">Khalifa Afane</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wei,+W">Wenqi Wei</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mao,+Y">Ying Mao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Farooq,+J">Junaid Farooq</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+J">Juntao Chen</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Cryptography and Security (cs.CR)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> The escalating threat of phishing emails has become increasingly sophisticated with the rise of Large Language Models (LLMs). As attackers exploit LLMs to craft more convincing and evasive phishing emails, it is crucial to assess the resilience of current phishing defenses. In this study we conduct a comprehensive evaluation of traditional phishing detectors, such as Gmail Spam Filter, Apache SpamAssassin, and Proofpoint, as well as machine learning models like SVM, Logistic Regression, and Naive Bayes, in identifying both traditional and LLM-rephrased phishing emails. We also explore the emerging role of LLMs as phishing detection tools, a method already adopted by companies like NTT Security Holdings and JPMorgan Chase. Our results reveal notable declines in detection accuracy for rephrased emails across all detectors, highlighting critical weaknesses in current phishing defenses. As the threat landscape evolves, our findings underscore the need for stronger security controls and regulatory oversight on LLM-generated content to prevent its misuse in creating advanced phishing attacks. This study contributes to the development of more effective Cyber Threat Intelligence (CTI) by leveraging LLMs to generate diverse phishing variants that can be used for data augmentation, harnessing the power of LLMs to enhance phishing detection, and paving the way for more robust and adaptable threat detection systems. </p> </div> </dd> <dt> <a name='item134'>[134]</a> <a href ="/abs/2411.13876" title="Abstract" id="2411.13876"> arXiv:2411.13876 </a> [<a href="/pdf/2411.13876" title="Download PDF" id="pdf-2411.13876" aria-labelledby="pdf-2411.13876">pdf</a>, <a href="/format/2411.13876" title="Other formats" id="oth-2411.13876" aria-labelledby="oth-2411.13876">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Iterative decoding of short BCH codes and its post-processing </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+G">Guangwen Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+X">Xiao Yu</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 6 pages, 6 figures, 2 tables </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Information Theory (cs.IT)</span> </div> <p class='mathjax'> Effective iterative decoding of short BCH codes faces two primary challenges: identifying an appropriate parity-check matrix and accelerating decoder convergence. To address these issues, we propose a systematic scheme to derive an optimized parity-check matrix through a heuristic approach. This involves a series of binary sum and row shift operations, resulting in a low-density, quasi-regular column weight distribution with a reduced number of shortest cycles in the underlying redundant Tanner graph. For the revised normalized min-sum decoder, we concurrently integrate three types of random permutations into the alternated messages across iterations, leading to significantly faster convergence compared to existing methods. Furthermore, by utilizing the iterative trajectories of failed normalized min-sum decoding, we enhance the reliability measurement of codeword bits with the assistance of a neural network model from prior work, which accommodates more failures for the post-processing of ordered statistics decoding. Additionally, we report the types of undetected errors for the design of iterative decoders for short BCH codes, which potentially challenge efforts to approach the maximum likelihood limit. Extensive simulations demonstrate that the proposed hybrid framework achieves an attractive balance between performance, latency, and complexity. </p> </div> </dd> <dt> <a name='item135'>[135]</a> <a href ="/abs/2411.13878" title="Abstract" id="2411.13878"> arXiv:2411.13878 </a> [<a href="/pdf/2411.13878" title="Download PDF" id="pdf-2411.13878" aria-labelledby="pdf-2411.13878">pdf</a>, <a href="https://arxiv.org/html/2411.13878v1" title="View HTML" id="html-2411.13878" aria-labelledby="html-2411.13878" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13878" title="Other formats" id="oth-2411.13878" aria-labelledby="oth-2411.13878">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Sparse Zero Correlation Zone Arrays for Training Design in Spatial Modulation Systems </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Pai,+C">Cheng-Yu Pai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Z">Zilong Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+C">Chao-Yu Chen</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Information Theory (cs.IT)</span> </div> <p class='mathjax'> This paper presents a novel training matrix design for spatial modulation (SM) systems, by introducing a new class of two-dimensional (2D) arrays called sparse zero correlation zone (SZCZ) arrays. An SZCZ array is characterized by a majority of zero entries and exhibits the zero periodic auto- and cross-correlation zone properties across any two rows. With these unique properties, we show that SZCZ arrays can be effectively used as training matrices for SM systems. Additionally, direct constructions of SZCZ arrays with large ZCZ widths and controllable sparsity levels based on 2D restricted generalized Boolean functions (RGBFs) are proposed. Compared with existing training schemes, the proposed SZCZ-based training matrices have larger ZCZ widths, thereby offering greater tolerance for delay spread in multipath channels. Simulation results demonstrate that the proposed SZCZ-based training design exhibits superior channel estimation performance over frequency-selective fading channels compared to existing alternatives. </p> </div> </dd> <dt> <a name='item136'>[136]</a> <a href ="/abs/2411.13881" title="Abstract" id="2411.13881"> arXiv:2411.13881 </a> [<a href="/pdf/2411.13881" title="Download PDF" id="pdf-2411.13881" aria-labelledby="pdf-2411.13881">pdf</a>, <a href="https://arxiv.org/html/2411.13881v1" title="View HTML" id="html-2411.13881" aria-labelledby="html-2411.13881" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13881" title="Other formats" id="oth-2411.13881" aria-labelledby="oth-2411.13881">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Exploring applications of topological data analysis in stock index movement prediction </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+D">Dazhi Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+P">Pengcheng Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+X">Xiaocheng Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+J">Jiayi Chen</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 20 pages, 10 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Data Analysis, Statistics and Probability (physics.data-an) </div> <p class='mathjax'> Topological Data Analysis (TDA) has recently gained significant attention in the field of financial prediction. However, the choice of point cloud construction methods, topological feature representations, and classification models has a substantial impact on prediction results. This paper addresses the classification problem of stock index movement. First, we construct point clouds for stock indices using three different methods. Next, we apply TDA to extract topological structures from the point clouds. Four distinct topological features are computed to represent the patterns in the data, and 15 combinations of these features are enumerated and input into six different machine learning models. We evaluate the predictive performance of various TDA configurations by conducting index movement classification tasks on datasets such as CSI, DAX, HSI and FTSE providing insights into the efficiency of different TDA setups. </p> </div> </dd> <dt> <a name='item137'>[137]</a> <a href ="/abs/2411.13883" title="Abstract" id="2411.13883"> arXiv:2411.13883 </a> [<a href="/pdf/2411.13883" title="Download PDF" id="pdf-2411.13883" aria-labelledby="pdf-2411.13883">pdf</a>, <a href="https://arxiv.org/html/2411.13883v1" title="View HTML" id="html-2411.13883" aria-labelledby="html-2411.13883" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13883" title="Other formats" id="oth-2411.13883" aria-labelledby="oth-2411.13883">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> When Online Algorithms Influence the Environment: A Dynamical Systems Analysis of the Unintended Consequences </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Lankireddy,+P">Prabhat Lankireddy</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nair,+J">Jayakrishnan Nair</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Manjunath,+D">D Manjunath</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 13 pages, 4 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Computers and Society (cs.CY) </div> <p class='mathjax'> We analyze the effect that online algorithms have on the environment that they are learning. As a motivation, consider recommendation systems that use online algorithms to learn optimal product recommendations based on user and product attributes. It is well known that the sequence of recommendations affects user preferences. However, typical learning algorithms treat the user attributes as static and disregard the impact of their recommendations on user preferences. Our interest is to analyze the effect of this mismatch between the model assumption of a static environment, and the reality of an evolving environment affected by the recommendations. To perform this analysis, we first introduce a model for a generic coupled evolution of the parameters that are being learned, and the environment that is affected by it. We then frame a linear bandit recommendation system (RS) into this generic model where the users are characterized by a state variable that evolves based on the sequence of recommendations. The learning algorithm of the RS does not explicitly account for this evolution and assumes that the users are static. A dynamical system model that captures the coupled evolution of the population state and the learning algorithm is described, and its equilibrium behavior is analyzed. We show that when the recommendation algorithm is able to learn the population preferences in the presence of this mismatch, the algorithm induces similarity in the preferences of the user population. In particular, we present results on how different properties of the recommendation algorithm, namely the user attribute space and the exploration-exploitation tradeoff, effect the population preferences when they are learned by the algorithm. We demonstrate these results using model simulations. </p> </div> </dd> <dt> <a name='item138'>[138]</a> <a href ="/abs/2411.13885" title="Abstract" id="2411.13885"> arXiv:2411.13885 </a> [<a href="/pdf/2411.13885" title="Download PDF" id="pdf-2411.13885" aria-labelledby="pdf-2411.13885">pdf</a>, <a href="/format/2411.13885" title="Other formats" id="oth-2411.13885" aria-labelledby="oth-2411.13885">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Trajectory Tracking Using Frenet Coordinates with Deep Deterministic Policy Gradient </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Jiang,+T">Tongzhou Jiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+L">Lipeng Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jiang,+J">Junyue Jiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zheng,+T">Tianyao Zheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jin,+Y">Yuhui Jin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+K">Kunpeng Xu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span> </div> <p class='mathjax'> This paper studies the application of the DDPG algorithm in trajectory-tracking tasks and proposes a trajectorytracking control method combined with Frenet coordinate system. By converting the vehicle's position and velocity information from the Cartesian coordinate system to Frenet coordinate system, this method can more accurately describe the vehicle's deviation and travel distance relative to the center line of the road. The DDPG algorithm adopts the Actor-Critic framework, uses deep neural networks for strategy and value evaluation, and combines the experience replay mechanism and target network to improve the algorithm's stability and data utilization efficiency. Experimental results show that the DDPG algorithm based on Frenet coordinate system performs well in trajectory-tracking tasks in complex environments, achieves high-precision and stable path tracking, and demonstrates its application potential in autonomous driving and intelligent transportation systems. Keywords- DDPG; path tracking; robot navigation </p> </div> </dd> <dt> <a name='item139'>[139]</a> <a href ="/abs/2411.13886" title="Abstract" id="2411.13886"> arXiv:2411.13886 </a> [<a href="/pdf/2411.13886" title="Download PDF" id="pdf-2411.13886" aria-labelledby="pdf-2411.13886">pdf</a>, <a href="https://arxiv.org/html/2411.13886v1" title="View HTML" id="html-2411.13886" aria-labelledby="html-2411.13886" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13886" title="Other formats" id="oth-2411.13886" aria-labelledby="oth-2411.13886">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> CLFace: A Scalable and Resource-Efficient Continual Learning Framework for Lifelong Face Recognition </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Hasan,+M+M">Md Mahedi Hasan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sami,+S+M">Shoaib Meraj Sami</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nasrabadi,+N">Nasser Nasrabadi</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted for publication in the IEEE/CVF Winter Conference on Applications of Computer Vision (WACV 2025) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> An important aspect of deploying face recognition (FR) algorithms in real-world applications is their ability to learn new face identities from a continuous data stream. However, the online training of existing deep neural network-based FR algorithms, which are pre-trained offline on large-scale stationary datasets, encounter two major challenges: (I) catastrophic forgetting of previously learned identities, and (II) the need to store past data for complete retraining from scratch, leading to significant storage constraints and privacy concerns. In this paper, we introduce CLFace, a continual learning framework designed to preserve and incrementally extend the learned knowledge. CLFace eliminates the classification layer, resulting in a resource-efficient FR model that remains fixed throughout lifelong learning and provides label-free supervision to a student model, making it suitable for open-set face recognition during incremental steps. We introduce an objective function that employs feature-level distillation to reduce drift between feature maps of the student and teacher models across multiple stages. Additionally, it incorporates a geometry-preserving distillation scheme to maintain the orientation of the teacher model's feature embedding. Furthermore, a contrastive knowledge distillation is incorporated to continually enhance the discriminative power of the feature representation by matching similarities between new identities. Experiments on several benchmark FR datasets demonstrate that CLFace outperforms baseline approaches and state-of-the-art methods on unseen identities using both in-domain and out-of-domain datasets. </p> </div> </dd> <dt> <a name='item140'>[140]</a> <a href ="/abs/2411.13888" title="Abstract" id="2411.13888"> arXiv:2411.13888 </a> [<a href="/pdf/2411.13888" title="Download PDF" id="pdf-2411.13888" aria-labelledby="pdf-2411.13888">pdf</a>, <a href="https://arxiv.org/html/2411.13888v1" title="View HTML" id="html-2411.13888" aria-labelledby="html-2411.13888" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13888" title="Other formats" id="oth-2411.13888" aria-labelledby="oth-2411.13888">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Hierarchical Poisson Generator for Universal Graphs under Limited Resources </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Qi,+X">Xiaorui Qi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wen,+Y">Yanlong Wen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yuan,+X">Xiaojie Yuan</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 13 pages, under review </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Discrete Mathematics (cs.DM)</span>; Social and Information Networks (cs.SI) </div> <p class='mathjax'> Graph generation is one of the most challenging tasks in recent years, and its core is to learn the ground truth distribution hiding in the training data. However, training data may not be available due to security concerns or unaffordable costs, which severely blows the learning models, especially the deep generative models. The dilemma leads us to rethink non-learned generation methods based on graph invariant features. Based on the observation of scale-free property, we propose a hierarchical Poisson graph generation algorithm. Specifically, we design a two-stage generation strategy. In the first stage, we sample multiple anchor nodes according to the Poisson distribution to further guide the formation of substructures, splitting the initial node set into multiple ones. Next, we progressively generate edges by sampling nodes through a degree mixing distribution, adjusting the tolerance towards exotic structures via two thresholds. We provide theoretical guarantees for hierarchical generation and verify the effectiveness of our method under 12 datasets of three categories. Experimental results show that our method fits the ground truth distribution better than various generation strategies and other distribution observations. </p> </div> </dd> <dt> <a name='item141'>[141]</a> <a href ="/abs/2411.13890" title="Abstract" id="2411.13890"> arXiv:2411.13890 </a> [<a href="/pdf/2411.13890" title="Download PDF" id="pdf-2411.13890" aria-labelledby="pdf-2411.13890">pdf</a>, <a href="https://arxiv.org/html/2411.13890v1" title="View HTML" id="html-2411.13890" aria-labelledby="html-2411.13890" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13890" title="Other formats" id="oth-2411.13890" aria-labelledby="oth-2411.13890">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> GraCo -- A Graph Composer for Integrated Circuits </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Uhlich,+S">Stefan Uhlich</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bonetti,+A">Andrea Bonetti</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Venkitaraman,+A">Arun Venkitaraman</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Momeni,+A">Ali Momeni</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Matsuo,+R">Ryoga Matsuo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hsieh,+C">Chia-Yu Hsieh</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ohbuchi,+E">Eisaku Ohbuchi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Servadei,+L">Lorenzo Servadei</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Hardware Architecture (cs.AR) </div> <p class='mathjax'> Designing integrated circuits involves substantial complexity, posing challenges in revealing its potential applications - from custom digital cells to analog circuits. Despite extensive research over the past decades in building versatile and automated frameworks, there remains open room to explore more computationally efficient AI-based solutions. This paper introduces the graph composer GraCo, a novel method for synthesizing integrated circuits using reinforcement learning (RL). GraCo learns to construct a graph step-by-step, which is then converted into a netlist and simulated with SPICE. We demonstrate that GraCo is highly configurable, enabling the incorporation of prior design knowledge into the framework. We formalize how this prior knowledge can be utilized and, in particular, show that applying consistency checks enhances the efficiency of the sampling process. To evaluate its performance, we compare GraCo to a random baseline, which is known to perform well for smaller design space problems. We demonstrate that GraCo can discover circuits for tasks such as generating standard cells, including the inverter and the two-input NAND (NAND2) gate. Compared to a random baseline, GraCo requires 5x fewer sampling steps to design an inverter and successfully synthesizes a NAND2 gate that is 2.5x faster. </p> </div> </dd> <dt> <a name='item142'>[142]</a> <a href ="/abs/2411.13892" title="Abstract" id="2411.13892"> arXiv:2411.13892 </a> [<a href="/pdf/2411.13892" title="Download PDF" id="pdf-2411.13892" aria-labelledby="pdf-2411.13892">pdf</a>, <a href="https://arxiv.org/html/2411.13892v1" title="View HTML" id="html-2411.13892" aria-labelledby="html-2411.13892" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13892" title="Other formats" id="oth-2411.13892" aria-labelledby="oth-2411.13892">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Topology-Aware Popularity Debiasing via Simplicial Complexes </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ji,+Y">Yanbiao Ji</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ding,+Y">Yue Ding</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+C">Chang Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lu,+Y">Yuxiang Lu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xin,+X">Xin Xin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lu,+H">Hongtao Lu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Information Retrieval (cs.IR)</span> </div> <p class='mathjax'> Recommender systems (RS) play a critical role in delivering personalized content across various online platforms, leveraging collaborative filtering (CF) as a key technique to generate recommendations based on users' historical interaction data. Recent advancements in CF have been driven by the adoption of Graph Neural Networks (GNNs), which model user-item interactions as bipartite graphs, enabling the capture of high-order collaborative signals. Despite their success, GNN-based methods face significant challenges due to the inherent popularity bias in the user-item interaction graph's topology, leading to skewed recommendations that favor popular items over less-known ones. <br>To address this challenge, we propose a novel topology-aware popularity debiasing framework, Test-time Simplicial Propagation (TSP), which incorporates simplicial complexes (SCs) to enhance the expressiveness of GNNs. Unlike traditional methods that focus on pairwise relationships, our approach captures multi-order relationships through SCs, providing a more comprehensive representation of user-item interactions. By enriching the neighborhoods of tail items and leveraging SCs for feature smoothing, TSP enables the propagation of multi-order collaborative signals and effectively mitigates biased propagation. <br>Our TSP module is designed as a plug-and-play solution, allowing for seamless integration into pre-trained GNN-based models without the need for fine-tuning additional parameters. Extensive experiments on five real-world datasets demonstrate the superior performance of our method, particularly in long-tail recommendation tasks. Visualization results further confirm that TSP produces more uniform distributions of item representations, leading to fairer and more accurate recommendations. </p> </div> </dd> <dt> <a name='item143'>[143]</a> <a href ="/abs/2411.13899" title="Abstract" id="2411.13899"> arXiv:2411.13899 </a> [<a href="/pdf/2411.13899" title="Download PDF" id="pdf-2411.13899" aria-labelledby="pdf-2411.13899">pdf</a>, <a href="https://arxiv.org/html/2411.13899v1" title="View HTML" id="html-2411.13899" aria-labelledby="html-2411.13899" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13899" title="Other formats" id="oth-2411.13899" aria-labelledby="oth-2411.13899">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Schemato -- An LLM for Netlist-to-Schematic Conversion </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Matsuo,+R">Ryoga Matsuo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Uhlich,+S">Stefan Uhlich</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Venkitaraman,+A">Arun Venkitaraman</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bonetti,+A">Andrea Bonetti</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hsieh,+C">Chia-Yu Hsieh</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Momeni,+A">Ali Momeni</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mauch,+L">Lukas Mauch</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Capone,+A">Augusto Capone</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ohbuchi,+E">Eisaku Ohbuchi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Servadei,+L">Lorenzo Servadei</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> Machine learning models are advancing circuit design, particularly in analog circuits. They typically generate netlists that lack human interpretability. This is a problem as human designers heavily rely on the interpretability of circuit diagrams or schematics to intuitively understand, troubleshoot, and develop designs. Hence, to integrate domain knowledge effectively, it is crucial to translate ML-generated netlists into interpretable schematics quickly and accurately. We propose Schemato, a large language model (LLM) for netlist-to-schematic conversion. In particular, we consider our approach in the two settings of converting netlists to .asc files for LTSpice and LATEX files for CircuiTikz schematics. Experiments on our circuit dataset show that Schemato achieves up to 93% compilation success rate for the netlist-to-LaTeX conversion task, surpassing the 26% rate scored by the state-of-the-art LLMs. Furthermore, our experiments show that Schemato generates schematics with a mean structural similarity index measure that is 3xhigher than the best performing LLMs, therefore closer to the reference human design. </p> </div> </dd> <dt> <a name='item144'>[144]</a> <a href ="/abs/2411.13900" title="Abstract" id="2411.13900"> arXiv:2411.13900 </a> [<a href="/pdf/2411.13900" title="Download PDF" id="pdf-2411.13900" aria-labelledby="pdf-2411.13900">pdf</a>, <a href="https://arxiv.org/html/2411.13900v1" title="View HTML" id="html-2411.13900" aria-labelledby="html-2411.13900" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13900" title="Other formats" id="oth-2411.13900" aria-labelledby="oth-2411.13900">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Dissecting Conditional Branch Predictors of Apple Firestorm and Qualcomm Oryon for Software Optimization and Architectural Analysis </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+J">Jiajie Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Qu,+P">Peng Qu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Y">Youhui Zhang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Hardware Architecture (cs.AR)</span> </div> <p class='mathjax'> Branch predictor (BP) is a critical component of modern processors, and its accurate modeling is essential for compilers and applications. However, processor vendors have disclosed limited details about their BP implementations. <br>Recent advancements in reverse engineering the BP of general-purpose processors have enabled the creation of more accurate BP models. Nonetheless, we have identified critical deficiencies in the existing methods. For instance, they impose strong assumptions on the branch history update function and the index/tag functions of key BP components, limiting their applicability to a broader range of processors, including those from Apple and Qualcomm. <br>In this paper, we design a more general branch prediction reverse engineering pipeline that can additionally recover the conditional branch predictors (CBPs) of Apple Firestorm and Qualcomm Oryon microarchitectures, and subsequently build accurate CBP models. Leveraging these models, we uncover two previously undisclosed effects that impair branch prediction accuracy and propose related solutions, resulting in up to 14% MPKI reduction and 7% performance improvement in representative applications. Furthermore, we conduct a comprehensive comparison of the known Intel/Apple/Qualcomm CBPs using a unified standalone branch predictor simulator, which facilitates a deeper understanding of CBP behavior. </p> </div> </dd> <dt> <a name='item145'>[145]</a> <a href ="/abs/2411.13901" title="Abstract" id="2411.13901"> arXiv:2411.13901 </a> [<a href="/pdf/2411.13901" title="Download PDF" id="pdf-2411.13901" aria-labelledby="pdf-2411.13901">pdf</a>, <a href="/format/2411.13901" title="Other formats" id="oth-2411.13901" aria-labelledby="oth-2411.13901">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Dressing the Imagination: A Dataset for AI-Powered Translation of Text into Fashion Outfits and A Novel KAN Adapter for Enhanced Feature Adaptation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Deshmukh,+G">Gayatri Deshmukh</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=De,+S">Somsubhra De</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sehgal,+C">Chirag Sehgal</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gupta,+J+S">Jishu Sen Gupta</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mittal,+S">Sparsh Mittal</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Under review at a conference </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Specialized datasets that capture the fashion industry's rich language and styling elements can boost progress in AI-driven fashion design. We present FLORA (Fashion Language Outfit Representation for Apparel Generation), the first comprehensive dataset containing 4,330 curated pairs of fashion outfits and corresponding textual descriptions. Each description utilizes industry-specific terminology and jargon commonly used by professional fashion designers, providing precise and detailed insights into the outfits. Hence, the dataset captures the delicate features and subtle stylistic elements necessary to create high-fidelity fashion designs. We demonstrate that fine-tuning generative models on the FLORA dataset significantly enhances their capability to generate accurate and stylistically rich images from textual descriptions of fashion sketches. FLORA will catalyze the creation of advanced AI models capable of comprehending and producing subtle, stylistically rich fashion designs. It will also help fashion designers and end-users to bring their ideas to life. <br>As a second orthogonal contribution, we introduce KAN Adapters, which leverage Kolmogorov-Arnold Networks (KAN) as adaptive modules. They serve as replacements for traditional MLP-based LoRA adapters. With learnable spline-based activations, KAN Adapters excel in modeling complex, non-linear relationships, achieving superior fidelity, faster convergence and semantic alignment. Extensive experiments and ablation studies on our proposed FLORA dataset validate the superiority of KAN Adapters over LoRA adapters. To foster further research and collaboration, we will open-source both the FLORA and our implementation code. </p> </div> </dd> <dt> <a name='item146'>[146]</a> <a href ="/abs/2411.13902" title="Abstract" id="2411.13902"> arXiv:2411.13902 </a> [<a href="/pdf/2411.13902" title="Download PDF" id="pdf-2411.13902" aria-labelledby="pdf-2411.13902">pdf</a>, <a href="https://arxiv.org/html/2411.13902v1" title="View HTML" id="html-2411.13902" aria-labelledby="html-2411.13902" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13902" title="Other formats" id="oth-2411.13902" aria-labelledby="oth-2411.13902">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> PIORS: Personalized Intelligent Outpatient Reception based on Large Language Model with Multi-Agents Medical Scenario Simulation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Bao,+Z">Zhijie Bao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Q">Qingyun Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Guo,+Y">Ying Guo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ye,+Z">Zhengqiang Ye</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shen,+J">Jun Shen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xie,+S">Shirong Xie</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Peng,+J">Jiajie Peng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+X">Xuanjing Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wei,+Z">Zhongyu Wei</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> In China, receptionist nurses face overwhelming workloads in outpatient settings, limiting their time and attention for each patient and ultimately reducing service quality. In this paper, we present the Personalized Intelligent Outpatient Reception System (PIORS). This system integrates an LLM-based reception nurse and a collaboration between LLM and hospital information system (HIS) into real outpatient reception setting, aiming to deliver personalized, high-quality, and efficient reception services. Additionally, to enhance the performance of LLMs in real-world healthcare scenarios, we propose a medical conversational data generation framework named Service Flow aware Medical Scenario Simulation (SFMSS), aiming to adapt the LLM to the real-world environments and PIORS settings. We evaluate the effectiveness of PIORS and SFMSS through automatic and human assessments involving 15 users and 15 clinical experts. The results demonstrate that PIORS-Nurse outperforms all baselines, including the current state-of-the-art model GPT-4o, and aligns with human preferences and clinical needs. Further details and demo can be found at <a href="https://github.com/FudanDISC/PIORS" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </p> </div> </dd> <dt> <a name='item147'>[147]</a> <a href ="/abs/2411.13904" title="Abstract" id="2411.13904"> arXiv:2411.13904 </a> [<a href="/pdf/2411.13904" title="Download PDF" id="pdf-2411.13904" aria-labelledby="pdf-2411.13904">pdf</a>, <a href="https://arxiv.org/html/2411.13904v1" title="View HTML" id="html-2411.13904" aria-labelledby="html-2411.13904" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13904" title="Other formats" id="oth-2411.13904" aria-labelledby="oth-2411.13904">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Towards Full Delegation: Designing Ideal Agentic Behaviors for Travel Planning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Jiang,+S">Song Jiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=JU,+D">Da JU</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cohen,+A">Andrew Cohen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mitts,+S">Sasha Mitts</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Foss,+A">Aaron Foss</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kao,+J+T">Justine T Kao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+X">Xian Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tian,+Y">Yuandong Tian</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span> </div> <p class='mathjax'> How are LLM-based agents used in the future? While many of the existing work on agents has focused on improving the performance of a specific family of objective and challenging tasks, in this work, we take a different perspective by thinking about full delegation: agents take over humans' routine decision-making processes and are trusted by humans to find solutions that fit people's personalized needs and are adaptive to ever-changing context. In order to achieve such a goal, the behavior of the agents, i.e., agentic behaviors, should be evaluated not only on their achievements (i.e., outcome evaluation), but also how they achieved that (i.e., procedure evaluation). For this, we propose APEC Agent Constitution, a list of criteria that an agent should follow for good agentic behaviors, including Accuracy, Proactivity, Efficiency and Credibility. To verify whether APEC aligns with human preferences, we develop APEC-Travel, a travel planning agent that proactively extracts hidden personalized needs via multi-round dialog with travelers. APEC-Travel is constructed purely from synthetic data generated by Llama3.1-405B-Instruct with a diverse set of travelers' persona to simulate rich distribution of dialogs. Iteratively fine-tuned to follow APEC Agent Constitution, APEC-Travel surpasses baselines by 20.7% on rule-based metrics and 9.1% on LLM-as-a-Judge scores across the constitution axes. </p> </div> </dd> <dt> <a name='item148'>[148]</a> <a href ="/abs/2411.13906" title="Abstract" id="2411.13906"> arXiv:2411.13906 </a> [<a href="/pdf/2411.13906" title="Download PDF" id="pdf-2411.13906" aria-labelledby="pdf-2411.13906">pdf</a>, <a href="/format/2411.13906" title="Other formats" id="oth-2411.13906" aria-labelledby="oth-2411.13906">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Structure-preserving model reduction of Hamiltonian systems by learning a symplectic autoencoder </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Niggl,+F">F.K.J. Niggl</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Numerical Analysis (math.NA)</span> </div> <p class='mathjax'> Evolutionary partial differential equations play a crucial role in many areas of science and engineering. Spatial discretization of these equations leads to a system of ordinary differential equations which can then be solved by numerical time integration. Such a system is often of very high dimension, making the simulation very time consuming. One way to reduce the computational cost is to approximate the large system by a low-dimensional model using a model reduction approach. This master thesis deals with structure-preserving model reduction of Hamiltonian systems by using machine learning techniques. We discuss a nonlinear approach based on the construction of an encoder-decoder pair that minimizes the approximation error and satisfies symplectic constraints to guarantee the preservation of the structure inherent in Hamiltonian systems. More specifically, we study an autoencoder network that learns a symplectic encoder-decoder pair. Symplecticity poses some additional difficulties, as we need to ensure this structure in each network layer. Since these symplectic constraints are described by the (symplectic) Stiefel manifold, we use manifold optimization techniques to ensure the symplecticity of the encoder and decoder. A particular challenge is to adapt the ADAM optimizer to the manifold structure. We present a modified ADAM optimizer that works directly on the Stiefel manifold and compare it to the existing approach based on homogeneous spaces. In addition, we propose several modifications to the network and training setup that significantly improve the performance and accuracy of the autoencoder. Finally, we numerically validate the modified optimizer and different learning configurations on two Hamiltonian systems, the 1D wave equation and the sine-Gordon equation, and demonstrate the improved accuracy and computational efficiency of the presented learning algorithms. </p> </div> </dd> <dt> <a name='item149'>[149]</a> <a href ="/abs/2411.13907" title="Abstract" id="2411.13907"> arXiv:2411.13907 </a> [<a href="/pdf/2411.13907" title="Download PDF" id="pdf-2411.13907" aria-labelledby="pdf-2411.13907">pdf</a>, <a href="https://arxiv.org/html/2411.13907v1" title="View HTML" id="html-2411.13907" aria-labelledby="html-2411.13907" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13907" title="Other formats" id="oth-2411.13907" aria-labelledby="oth-2411.13907">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Split Federated Learning Over Heterogeneous Edge Devices: Algorithm and Optimization </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Sun,+Y">Yunrui Sun</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hu,+G">Gang Hu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Teng,+Y">Yinglei Teng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cai,+D">Dunbo Cai</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Distributed, Parallel, and Cluster Computing (cs.DC); Neural and Evolutionary Computing (cs.NE) </div> <p class='mathjax'> Split Learning (SL) is a promising collaborative machine learning approach, enabling resource-constrained devices to train models without sharing raw data, while reducing computational load and preserving privacy simultaneously. However, current SL algorithms face limitations in training efficiency and suffer from prolonged latency, particularly in sequential settings, where the slowest device can bottleneck the entire process due to heterogeneous resources and frequent data exchanges between clients and servers. To address these challenges, we propose the Heterogeneous Split Federated Learning (HSFL) framework, which allows resource-constrained clients to train their personalized client-side models in parallel, utilizing different cut layers. Aiming to mitigate the impact of heterogeneous environments and accelerate the training process, we formulate a latency minimization problem that optimizes computational and transmission resources jointly. Additionally, we design a resource allocation algorithm that combines the Sample Average Approximation (SAA), Genetic Algorithm (GA), Lagrangian relaxation and Branch and Bound (B\&B) methods to efficiently solve this problem. Simulation results demonstrate that HSFL outperforms other frameworks in terms of both convergence rate and model accuracy on heterogeneous devices with non-iid data, while the optimization algorithm is better than other baseline methods in reducing latency. </p> </div> </dd> <dt> <a name='item150'>[150]</a> <a href ="/abs/2411.13908" title="Abstract" id="2411.13908"> arXiv:2411.13908 </a> [<a href="/pdf/2411.13908" title="Download PDF" id="pdf-2411.13908" aria-labelledby="pdf-2411.13908">pdf</a>, <a href="https://arxiv.org/html/2411.13908v1" title="View HTML" id="html-2411.13908" aria-labelledby="html-2411.13908" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13908" title="Other formats" id="oth-2411.13908" aria-labelledby="oth-2411.13908">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Hybrid Physics-ML Modeling for Marine Vehicle Maneuvering Motions in the Presence of Environmental Disturbances </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Z">Zihao Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cheng,+J">Jian Cheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+L">Liang Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hao,+L">Lizhu Hao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Peng,+Y">Yan Peng</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span> </div> <p class='mathjax'> A hybrid physics-machine learning modeling framework is proposed for the surface vehicles' maneuvering motions to address the modeling capability and stability in the presence of environmental disturbances. From a deep learning perspective, the framework is based on a variant version of residual networks with additional feature extraction. Initially, an imperfect physical model is derived and identified to capture the fundamental hydrodynamic characteristics of marine vehicles. This model is then integrated with a feedforward network through a residual block. Additionally, feature extraction from trigonometric transformations is employed in the machine learning component to account for the periodic influence of currents and waves. The proposed method is evaluated using real navigational data from the 'JH7500' unmanned surface vehicle. The results demonstrate the robust generalizability and accurate long-term prediction capabilities of the nonlinear dynamic model in specific environmental conditions. This approach has the potential to be extended and applied to develop a comprehensive high-fidelity simulator. </p> </div> </dd> <dt> <a name='item151'>[151]</a> <a href ="/abs/2411.13909" title="Abstract" id="2411.13909"> arXiv:2411.13909 </a> [<a href="/pdf/2411.13909" title="Download PDF" id="pdf-2411.13909" aria-labelledby="pdf-2411.13909">pdf</a>, <a href="https://arxiv.org/html/2411.13909v1" title="View HTML" id="html-2411.13909" aria-labelledby="html-2411.13909" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13909" title="Other formats" id="oth-2411.13909" aria-labelledby="oth-2411.13909">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Panther: Illuminate the Sight of Multimodal LLMs with Instruction-Guided Visual Prompts </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+H">Honglin Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gao,+Y">Yuting Gao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+C">Chenglu Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+J">Jingdong Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+M">Ming Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+L">Lin Yang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Multimodal large language models (MLLMs) are closing the gap to human visual perception capability rapidly, while, still lag behind on attending to subtle images details or locating small objects precisely, etc. Common schemes to tackle these issues include deploying multiple vision encoders or operating on original high-resolution images. Few studies have concentrated on taking the textual instruction into improving visual representation, resulting in losing focus in some vision-centric tasks, a phenomenon we herein termed as Amblyopia. In this work, we introduce Panther, a MLLM that closely adheres to user instruction and locates targets of interests precisely, with the finesse of a black panther. Specifically, Panther comprises three integral components: Panther-VE, Panther-Bridge, and Panther-Decoder. Panther-VE integrates user instruction information at the early stages of the vision encoder, thereby extracting the most relevant and useful visual representations. The Panther-Bridge module, equipped with powerful filtering capabilities, significantly reduces redundant visual information, leading to a substantial savings in training costs. The Panther-Decoder is versatile and can be employed with any decoder-only architecture of LLMs without discrimination. Experimental results, particularly on vision-centric benchmarks, have demonstrated the effectiveness of Panther. </p> </div> </dd> <dt> <a name='item152'>[152]</a> <a href ="/abs/2411.13913" title="Abstract" id="2411.13913"> arXiv:2411.13913 </a> [<a href="/pdf/2411.13913" title="Download PDF" id="pdf-2411.13913" aria-labelledby="pdf-2411.13913">pdf</a>, <a href="https://arxiv.org/html/2411.13913v1" title="View HTML" id="html-2411.13913" aria-labelledby="html-2411.13913" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13913" title="Other formats" id="oth-2411.13913" aria-labelledby="oth-2411.13913">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Generalizing subdiffusive Black-Scholes model by variable exponent: Model transformation and numerical approximation </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Zhang,+M">Meihui Zhang</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Liu,+M">Mengmeng Liu</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Qiu,+W">Wenlin Qiu</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Zheng,+X">Xiangcheng Zheng</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Numerical Analysis (math.NA)</span> </div> <p class='mathjax'> This work generalizes the subdiffusive Black-Scholes model by introducing the variable exponent in order to provide adequate descriptions for the option pricing, where the variable exponent may account for the variation of the memory property. In addition to standard nonlinear-to-linear transformation, we apply a further spatial-temporal transformation to convert the model to a more tractable form in order to circumvent the difficulties caused by the ``non-positive, non-monotonic'' variable-exponent memory kernel. An interesting phenomenon is that the spatial transformation not only eliminates the advection term but naturally turns the original noncoercive spatial operator into a coercive one due to the specific structure of the Black-Scholes model, which thus avoids imposing constraints on coefficients. Then we perform numerical analysis for both the semi-discrete and fully discrete schemes to support numerical simulation. Numerical experiments are carried out to substantiate the theoretical results. </p> </div> </dd> <dt> <a name='item153'>[153]</a> <a href ="/abs/2411.13914" title="Abstract" id="2411.13914"> arXiv:2411.13914 </a> [<a href="/pdf/2411.13914" title="Download PDF" id="pdf-2411.13914" aria-labelledby="pdf-2411.13914">pdf</a>, <a href="https://arxiv.org/html/2411.13914v1" title="View HTML" id="html-2411.13914" aria-labelledby="html-2411.13914" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13914" title="Other formats" id="oth-2411.13914" aria-labelledby="oth-2411.13914">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> ICODE: Modeling Dynamical Systems with Extrinsic Input Information </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+Z">Zhaoyi Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mei,+W">Wenjie Mei</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+K">Ke Yu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bai,+Y">Yang Bai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+S">Shihua Li</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> Learning models of dynamical systems with external inputs, that may be, for example, nonsmooth or piecewise, is crucial for studying complex phenomena and predicting future state evolution, which is essential for applications such as safety guarantees and decision-making. In this work, we introduce \emph{Input Concomitant Neural ODEs (ICODEs)}, which incorporate precise real-time input information into the learning process of the models, rather than treating the inputs as hidden parameters to be learned. The sufficient conditions to ensure the model's contraction property are provided to guarantee that system trajectories of the trained model converge to a fixed point, regardless of initial conditions across different training processes. We validate our method through experiments on several representative real dynamics: Single-link robot, DC-to-DC converter, motion dynamics of a rigid body, Rabinovich-Fabrikant equation, Glycolytic-glycogenolytic pathway model, and heat conduction equation. The experimental results demonstrate that our proposed ICODEs efficiently learn the ground truth systems, achieving superior prediction performance under both typical and atypical inputs. This work offers a valuable class of neural ODE models for understanding physical systems with explicit external input information, with potential promising applications in fields such as physics and robotics. </p> </div> </dd> <dt> <a name='item154'>[154]</a> <a href ="/abs/2411.13916" title="Abstract" id="2411.13916"> arXiv:2411.13916 </a> [<a href="/pdf/2411.13916" title="Download PDF" id="pdf-2411.13916" aria-labelledby="pdf-2411.13916">pdf</a>, <a href="https://arxiv.org/html/2411.13916v1" title="View HTML" id="html-2411.13916" aria-labelledby="html-2411.13916" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13916" title="Other formats" id="oth-2411.13916" aria-labelledby="oth-2411.13916">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Joint-repositionable Inner-wireless Planar Snake Robot </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Kanada,+A">Ayato Kanada</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Takahashi,+R">Ryo Takahashi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hayashi,+K">Keito Hayashi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hosaka,+R">Ryusuke Hosaka</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yukita,+W">Wakako Yukita</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nakashima,+Y">Yasutaka Nakashima</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yokota,+T">Tomoyuki Yokota</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Someya,+T">Takao Someya</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kamezaki,+M">Mitsuhiro Kamezaki</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kawahara,+Y">Yoshihiro Kawahara</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yamamoto,+M">Motoji Yamamoto</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span>; Systems and Control (eess.SY) </div> <p class='mathjax'> Bio-inspired multi-joint snake robots offer the advantages of terrain adaptability due to their limbless structure and high flexibility. However, a series of dozens of motor units in typical multiple-joint snake robots results in a heavy body structure and hundreds of watts of high power consumption. This paper presents a joint-repositionable, inner-wireless snake robot that enables multi-joint-like locomotion using a low-powered underactuated mechanism. The snake robot, consisting of a series of flexible passive links, can dynamically change its joint coupling configuration by repositioning motor-driven joint units along rack gears inside the robot. Additionally, a soft robot skin wirelessly powers the internal joint units, avoiding the risk of wire tangling and disconnection caused by the movable joint units. The combination of the joint-repositionable mechanism and the wireless-charging-enabled soft skin achieves a high degree of bending, along with a lightweight structure of 1.3 kg and energy-efficient wireless power transmission of 7.6 watts. </p> </div> </dd> <dt> <a name='item155'>[155]</a> <a href ="/abs/2411.13917" title="Abstract" id="2411.13917"> arXiv:2411.13917 </a> [<a href="/pdf/2411.13917" title="Download PDF" id="pdf-2411.13917" aria-labelledby="pdf-2411.13917">pdf</a>, <a href="https://arxiv.org/html/2411.13917v1" title="View HTML" id="html-2411.13917" aria-labelledby="html-2411.13917" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13917" title="Other formats" id="oth-2411.13917" aria-labelledby="oth-2411.13917">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> SpikEmo: Enhancing Emotion Recognition With Spiking Temporal Dynamics in Conversations </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+X">Xiaomin Yu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+F">Feiyang Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Qiao,+Z">Ziyue Qiao</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Multimedia (cs.MM)</span> </div> <p class='mathjax'> In affective computing, the task of Emotion Recognition in Conversations (ERC) has emerged as a focal area of research. The primary objective of this task is to predict emotional states within conversations by analyzing multimodal data including text, audio, and video. While existing studies have progressed in extracting and fusing representations from multimodal data, they often overlook the temporal dynamics in the data during conversations. To address this challenge, we have developed the SpikEmo framework, which is based on spiking neurons and employs a Semantic & Dynamic Two-stage Modeling approach to more precisely capture the complex temporal features of multimodal emotional data. Additionally, to tackle the class imbalance and emotional semantic similarity problems in the ERC tasks, we have devised an innovative combination of loss functions that significantly enhances the model's performance when dealing with ERC data characterized by long-tail distributions. Extensive experiments conducted on multiple ERC benchmark datasets demonstrate that SpikEmo significantly outperforms existing state-of-the-art methods in ERC tasks. Our code is available at <a href="https://github.com/Yu-xm/SpikEmo.git" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item156'>[156]</a> <a href ="/abs/2411.13918" title="Abstract" id="2411.13918"> arXiv:2411.13918 </a> [<a href="/pdf/2411.13918" title="Download PDF" id="pdf-2411.13918" aria-labelledby="pdf-2411.13918">pdf</a>, <a href="https://arxiv.org/html/2411.13918v1" title="View HTML" id="html-2411.13918" aria-labelledby="html-2411.13918" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13918" title="Other formats" id="oth-2411.13918" aria-labelledby="oth-2411.13918">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Quantization without Tears </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Fu,+M">Minghao Fu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+H">Hao Yu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shao,+J">Jie Shao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+J">Junjie Zhou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+K">Ke Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+J">Jianxin Wu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Deep neural networks, while achieving remarkable success across diverse tasks, demand significant resources, including computation, GPU memory, bandwidth, storage, and energy. Network quantization, as a standard compression and acceleration technique, reduces storage costs and enables potential inference acceleration by discretizing network weights and activations into a finite set of integer values. However, current quantization methods are often complex and sensitive, requiring extensive task-specific hyperparameters, where even a single misconfiguration can impair model performance, limiting generality across different models and tasks. In this paper, we propose Quantization without Tears (QwT), a method that simultaneously achieves quantization speed, accuracy, simplicity, and generality. The key insight of QwT is to incorporate a lightweight additional structure into the quantized network to mitigate information loss during quantization. This structure consists solely of a small set of linear layers, keeping the method simple and efficient. More importantly, it provides a closed-form solution, allowing us to improve accuracy effortlessly under 2 minutes. Extensive experiments across various vision, language, and multimodal tasks demonstrate that QwT is both highly effective and versatile. In fact, our approach offers a robust solution for network quantization that combines simplicity, accuracy, and adaptability, which provides new insights for the design of novel quantization paradigms. </p> </div> </dd> <dt> <a name='item157'>[157]</a> <a href ="/abs/2411.13919" title="Abstract" id="2411.13919"> arXiv:2411.13919 </a> [<a href="/pdf/2411.13919" title="Download PDF" id="pdf-2411.13919" aria-labelledby="pdf-2411.13919">pdf</a>, <a href="https://arxiv.org/html/2411.13919v1" title="View HTML" id="html-2411.13919" aria-labelledby="html-2411.13919" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13919" title="Other formats" id="oth-2411.13919" aria-labelledby="oth-2411.13919">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Predictive Maintenance Study for High-Pressure Industrial Compressors: Hybrid Clustering Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Costa,+A">Alessandro Costa</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mastriani,+E">Emilio Mastriani</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Incardona,+F">Federico Incardona</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Munari,+K">Kevin Munari</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Spinello,+S">Sebastiano Spinello</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 10 pages, 9 figures, 2 tables, HICSS58 conference </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> This study introduces a predictive maintenance strategy for high pressure industrial compressors using sensor data and features derived from unsupervised clustering integrated into classification models. The goal is to enhance model accuracy and efficiency in detecting compressor failures. After data pre processing, sensitive clustering parameters were tuned to identify algorithms that best capture the dataset's temporal and operational characteristics. Clustering algorithms were evaluated using quality metrics like Normalized Mutual Information (NMI) and Adjusted Rand Index (ARI), selecting those most effective at distinguishing between normal and non normal conditions. These features enriched regression models, improving failure detection accuracy by 4.87 percent on average. Although training time was reduced by 22.96 percent, the decrease was not statistically significant, varying across algorithms. Cross validation and key performance metrics confirmed the benefits of clustering based features in predictive maintenance models. </p> </div> </dd> <dt> <a name='item158'>[158]</a> <a href ="/abs/2411.13921" title="Abstract" id="2411.13921"> arXiv:2411.13921 </a> [<a href="/pdf/2411.13921" title="Download PDF" id="pdf-2411.13921" aria-labelledby="pdf-2411.13921">pdf</a>, <a href="https://arxiv.org/html/2411.13921v1" title="View HTML" id="html-2411.13921" aria-labelledby="html-2411.13921" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13921" title="Other formats" id="oth-2411.13921" aria-labelledby="oth-2411.13921">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> NBMLSS: probabilistic forecasting of electricity prices via Neural Basis Models for Location Scale and Shape </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Brusaferri,+A">Alessandro Brusaferri</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ramin,+D">Danial Ramin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ballarino,+A">Andrea Ballarino</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 23 pages </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> Forecasters using flexible neural networks (NN) in multi-horizon distributional regression setups often struggle to gain detailed insights into the underlying mechanisms that lead to the predicted feature-conditioned distribution parameters. In this work, we deploy a Neural Basis Model for Location, Scale and Shape, that blends the principled interpretability of GAMLSS with a computationally scalable shared basis decomposition, combined by linear projections supporting dedicated stepwise and parameter-wise feature shape functions aggregations. Experiments have been conducted on multiple market regions, achieving probabilistic forecasting performance comparable to that of distributional neural networks, while providing more insights into the model behavior through the learned nonlinear feature level maps to the distribution parameters across the prediction steps. </p> </div> </dd> <dt> <a name='item159'>[159]</a> <a href ="/abs/2411.13924" title="Abstract" id="2411.13924"> arXiv:2411.13924 </a> [<a href="/pdf/2411.13924" title="Download PDF" id="pdf-2411.13924" aria-labelledby="pdf-2411.13924">pdf</a>, <a href="https://arxiv.org/html/2411.13924v1" title="View HTML" id="html-2411.13924" aria-labelledby="html-2411.13924" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13924" title="Other formats" id="oth-2411.13924" aria-labelledby="oth-2411.13924">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Robust Data-Driven Predictive Control for Mixed Platoons under Noise and Attacks </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Li,+S">Shuai Li</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Chen,+C">Chaoyi Chen</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Zheng,+H">Haotian Zheng</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Wang,+J">Jiawei Wang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Xu,+Q">Qing Xu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Wang,+J">Jianqiang Wang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Li,+K">Keqiang Li</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 16 pages, 7 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Systems and Control (eess.SY)</span> </div> <p class='mathjax'> Controlling mixed platoons, which consist of both connected and automated vehicles (CAVs) and human-driven vehicles (HDVs), poses significant challenges due to the uncertain and unknown human driving behaviors. Data-driven control methods offer promising solutions by leveraging available trajectory data, but their performance can be compromised by process noise and adversarial attacks. To address this issue, this paper proposes a Robust Data-EnablEd Predictive Leading Cruise Control (RDeeP-LCC) framework based on data-driven reachability analysis. The framework over-approximates system dynamics under noise and attack using a matrix zonotope set derived from data, and develops a stabilizing feedback control law. By decoupling the mixed platoon system into nominal and error components, we employ data-driven reachability sets to recursively compute error reachable sets that account for noise and attacks, and obtain tightened safety constraints of the nominal system. This leads to a robust data-driven predictive control framework, solved in a tube-based control manner. Numerical simulations and human-in-the-loop experiments validate that the RDeeP-LCC method significantly enhances the robustness of mixed platoons, improving mixed traffic stability and safety against practical noise and attacks. </p> </div> </dd> <dt> <a name='item160'>[160]</a> <a href ="/abs/2411.13927" title="Abstract" id="2411.13927"> arXiv:2411.13927 </a> [<a href="/pdf/2411.13927" title="Download PDF" id="pdf-2411.13927" aria-labelledby="pdf-2411.13927">pdf</a>, <a href="https://arxiv.org/html/2411.13927v1" title="View HTML" id="html-2411.13927" aria-labelledby="html-2411.13927" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13927" title="Other formats" id="oth-2411.13927" aria-labelledby="oth-2411.13927">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Multimodal 3D Reasoning Segmentation with Complex Scenes </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Jiang,+X">Xueying Jiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lu,+L">Lewei Lu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shao,+L">Ling Shao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lu,+S">Shijian Lu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> The recent development in multimodal learning has greatly advanced the research in 3D scene understanding in various real-world tasks such as embodied AI. However, most existing work shares two typical constraints: 1) they are short of reasoning ability for interaction and interpretation of human intension and 2) they focus on scenarios with single-category objects only which leads to over-simplified textual descriptions due to the negligence of multi-object scenarios and spatial relations among objects. We bridge the research gaps by proposing a 3D reasoning segmentation task for multiple objects in scenes. The task allows producing 3D segmentation masks and detailed textual explanations as enriched by 3D spatial relations among objects. To this end, we create ReasonSeg3D, a large-scale and high-quality benchmark that integrates 3D spatial relations with generated question-answer pairs and 3D segmentation masks. In addition, we design MORE3D, a simple yet effective method that enables multi-object 3D reasoning segmentation with user questions and textual outputs. Extensive experiments show that MORE3D excels in reasoning and segmenting complex multi-object 3D scenes, and the created ReasonSeg3D offers a valuable platform for future exploration of 3D reasoning segmentation. The dataset and code will be released. </p> </div> </dd> <dt> <a name='item161'>[161]</a> <a href ="/abs/2411.13929" title="Abstract" id="2411.13929"> arXiv:2411.13929 </a> [<a href="/pdf/2411.13929" title="Download PDF" id="pdf-2411.13929" aria-labelledby="pdf-2411.13929">pdf</a>, <a href="https://arxiv.org/html/2411.13929v1" title="View HTML" id="html-2411.13929" aria-labelledby="html-2411.13929" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13929" title="Other formats" id="oth-2411.13929" aria-labelledby="oth-2411.13929">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Transforming Engineering Diagrams: A Novel Approach for P&ID Digitization using Transformers </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=St%C3%BCrmer,+J+M">Jan Marius St眉rmer</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Graumann,+M">Marius Graumann</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Koch,+T">Tobias Koch</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> The digitization of complex technical systems, such as Piping and Instrumentation Diagrams (P&IDs), is crucial for efficient maintenance and operation of complex systems in hydraulic and process engineering. Previous approaches often rely on separate modules that analyze diagram elements individually, neglecting the diagram's overall structure. We address this limitation by proposing a novel approach that utilizes the Relationformer, a state-of-the-art deep learning architecture, to extract graphs from P&IDs. Our method leverages the ability of the Relationformer to simultaneously detect objects and their relationships in images, making it suitable for the task of graph extraction from engineering diagrams. We apply our proposed approach to both real-world and synthetically created P&ID datasets, and evaluate its effectiveness by comparing it with a modular digitization approach based on recent literature. We present PID2Graph, the first publicly accessible P&ID dataset featuring comprehensive labels for the graph structure, including symbols, nodes and their connections that is used for evaluation. To understand the effect of patching and stitching of both of the approaches, we compare values before and after merging the patches. For the real-world data, the Relationformer achieves convincing results, outperforming the modular digitization approach for edge detection by more than 25%. Our work provides a comprehensive framework for assessing the performance of P&ID digitization methods and opens up new avenues for research in this area using transformer architectures. The P&ID dataset used for evaluation will be published and publicly available upon acceptance of the paper. </p> </div> </dd> <dt> <a name='item162'>[162]</a> <a href ="/abs/2411.13932" title="Abstract" id="2411.13932"> arXiv:2411.13932 </a> [<a href="/pdf/2411.13932" title="Download PDF" id="pdf-2411.13932" aria-labelledby="pdf-2411.13932">pdf</a>, <a href="/format/2411.13932" title="Other formats" id="oth-2411.13932" aria-labelledby="oth-2411.13932">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> XAgents: A Framework for Interpretable Rule-Based Multi-Agents Cooperation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+H">Hailong Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gu,+M">Mingxian Gu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+R">Renhuo Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hu,+F">Fuping Hu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Deng,+Z">Zhaohong Deng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+Y">Yitang Chen</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Artificial Intelligence (cs.AI)</span>; Multiagent Systems (cs.MA) </div> <p class='mathjax'> Extracting implicit knowledge and logical reasoning abilities from large language models (LLMs) has consistently been a significant challenge. The advancement of multi-agent systems has further en-hanced the capabilities of LLMs. Inspired by the structure of multi-polar neurons (MNs), we propose the XAgents framework, an in-terpretable multi-agent cooperative framework based on the IF-THEN rule-based system. The IF-Parts of the rules are responsible for logical reasoning and domain membership calculation, while the THEN-Parts are comprised of domain expert agents that generate domain-specific contents. Following the calculation of the member-ship, XAgetns transmits the task to the disparate domain rules, which subsequently generate the various responses. These re-sponses are analogous to the answers provided by different experts to the same question. The final response is reached at by eliminat-ing the hallucinations and erroneous knowledge of the LLM through membership computation and semantic adversarial genera-tion of the various domain rules. The incorporation of rule-based interpretability serves to bolster user confidence in the XAgents framework. We evaluate the efficacy of XAgents through a com-parative analysis with the latest AutoAgents, in which XAgents demonstrated superior performance across three distinct datasets. We perform post-hoc interpretable studies with SHAP algorithm and case studies, proving the interpretability of XAgent in terms of input-output feature correlation and rule-based semantics. </p> </div> </dd> <dt> <a name='item163'>[163]</a> <a href ="/abs/2411.13934" title="Abstract" id="2411.13934"> arXiv:2411.13934 </a> [<a href="/pdf/2411.13934" title="Download PDF" id="pdf-2411.13934" aria-labelledby="pdf-2411.13934">pdf</a>, <a href="https://arxiv.org/html/2411.13934v1" title="View HTML" id="html-2411.13934" aria-labelledby="html-2411.13934" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13934" title="Other formats" id="oth-2411.13934" aria-labelledby="oth-2411.13934">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Learning to Cooperate with Humans using Generative Agents </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Liang,+Y">Yancheng Liang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+D">Daphne Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gupta,+A">Abhishek Gupta</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Du,+S+S">Simon S. Du</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jaques,+N">Natasha Jaques</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Multiagent Systems (cs.MA) </div> <p class='mathjax'> Training agents that can coordinate zero-shot with humans is a key mission in multi-agent reinforcement learning (MARL). Current algorithms focus on training simulated human partner policies which are then used to train a Cooperator agent. The simulated human is produced either through behavior cloning over a dataset of human cooperation behavior, or by using MARL to create a population of simulated agents. However, these approaches often struggle to produce a Cooperator that can coordinate well with real humans, since the simulated humans fail to cover the diverse strategies and styles employed by people in the real world. We show \emph{learning a generative model of human partners} can effectively address this issue. Our model learns a latent variable representation of the human that can be regarded as encoding the human's unique strategy, intention, experience, or style. This generative model can be flexibly trained from any (human or neural policy) agent interaction data. By sampling from the latent space, we can use the generative model to produce different partners to train Cooperator agents. We evaluate our method -- \textbf{G}enerative \textbf{A}gent \textbf{M}odeling for \textbf{M}ulti-agent \textbf{A}daptation (GAMMA) -- on Overcooked, a challenging cooperative cooking game that has become a standard benchmark for zero-shot coordination. We conduct an evaluation with real human teammates, and the results show that GAMMA consistently improves performance, whether the generative model is trained on simulated populations or human datasets. Further, we propose a method for posterior sampling from the generative model that is biased towards the human data, enabling us to efficiently improve performance with only a small amount of expensive human interaction data. </p> </div> </dd> <dt> <a name='item164'>[164]</a> <a href ="/abs/2411.13935" title="Abstract" id="2411.13935"> arXiv:2411.13935 </a> [<a href="/pdf/2411.13935" title="Download PDF" id="pdf-2411.13935" aria-labelledby="pdf-2411.13935">pdf</a>, <a href="https://arxiv.org/html/2411.13935v1" title="View HTML" id="html-2411.13935" aria-labelledby="html-2411.13935" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13935" title="Other formats" id="oth-2411.13935" aria-labelledby="oth-2411.13935">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Fast Stochastic MPC using Affine Disturbance Feedback Gains Learned Offline </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Lee,+H">Hotae Lee</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Borrelli,+F">Francesco Borrelli</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Submitted to L4DC 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Systems and Control (eess.SY)</span> </div> <p class='mathjax'> We propose a novel Stochastic Model Predictive Control (MPC) for uncertain linear systems subject to probabilistic constraints. The proposed approach leverages offline learning to extract key features of affine disturbance feedback policies, significantly reducing the computational burden of online optimization. Specifically, we employ offline data-driven sampling to learn feature components of feedback gains and approximate the chance-constrained feasible set with a specified confidence level. By utilizing this learned information, the online MPC problem is simplified to optimization over nominal inputs and a reduced set of learned feedback gains, ensuring computational efficiency. In a numerical example, the proposed MPC approach achieves comparable control performance in terms of Region of Attraction (ROA) and average closed-loop costs to classical MPC optimizing over disturbance feedback policies, while delivering a 10-fold improvement in computational speed. </p> </div> </dd> <dt> <a name='item165'>[165]</a> <a href ="/abs/2411.13941" title="Abstract" id="2411.13941"> arXiv:2411.13941 </a> [<a href="/pdf/2411.13941" title="Download PDF" id="pdf-2411.13941" aria-labelledby="pdf-2411.13941">pdf</a>, <a href="/format/2411.13941" title="Other formats" id="oth-2411.13941" aria-labelledby="oth-2411.13941">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> LLMs as Continuous Learners: Improving the Reproduction of Defective Code in Software Issues </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Lin,+Y">Yalan Lin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ma,+Y">Yingwei Ma</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cao,+R">Rongyu Cao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+B">Binhua Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+F">Fei Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gu,+X">Xiaodong Gu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+Y">Yongbin Li</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Software Engineering (cs.SE)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Reproducing buggy code is the first and crucially important step in issue resolving, as it aids in identifying the underlying problems and validating that generated patches resolve the problem. While numerous approaches have been proposed for this task, they primarily address common, widespread errors and struggle to adapt to unique, evolving errors specific to individual code repositories. To fill this gap, we propose EvoCoder, a multi-agent continuous learning framework for issue code reproduction. EvoCoder adopts a reflection mechanism that allows the LLM to continuously learn from previously resolved problems and dynamically refine its strategies to new emerging challenges. To prevent experience bloating, EvoCoder introduces a novel hierarchical experience pool that enables the model to adaptively update common and repo-specific experiences. Our experimental results show a 20\% improvement in issue reproduction rates over existing SOTA methods. Furthermore, integrating our reproduction mechanism significantly boosts the overall accuracy of the existing issue-resolving pipeline. </p> </div> </dd> <dt> <a name='item166'>[166]</a> <a href ="/abs/2411.13942" title="Abstract" id="2411.13942"> arXiv:2411.13942 </a> [<a href="/pdf/2411.13942" title="Download PDF" id="pdf-2411.13942" aria-labelledby="pdf-2411.13942">pdf</a>, <a href="https://arxiv.org/html/2411.13942v1" title="View HTML" id="html-2411.13942" aria-labelledby="html-2411.13942" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13942" title="Other formats" id="oth-2411.13942" aria-labelledby="oth-2411.13942">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Cooperative Grasping and Transportation using Multi-agent Reinforcement Learning with Ternary Force Representation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Bernard-Tiong,+I">Ing-Sheng Bernard-Tiong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tsurumine,+Y">Yoshihisa Tsurumine</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sota,+R">Ryosuke Sota</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shibata,+K">Kazuki Shibata</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Matsubara,+T">Takamitsu Matsubara</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span> </div> <p class='mathjax'> Cooperative grasping and transportation require effective coordination to complete the task. This study focuses on the approach leveraging force-sensing feedback, where robots use sensors to detect forces applied by others on an object to achieve coordination. Unlike explicit communication, it avoids delays and interruptions; however, force-sensing is highly sensitive and prone to interference from variations in grasping environment, such as changes in grasping force, grasping pose, object size and geometry, which can interfere with force signals, subsequently undermining coordination. We propose multi-agent reinforcement learning (MARL) with ternary force representation, a force representation that maintains consistent representation against variations in grasping environment. The simulation and real-world experiments demonstrate the robustness of the proposed method to changes in grasping force, object size and geometry as well as inherent sim2real gap. </p> </div> </dd> <dt> <a name='item167'>[167]</a> <a href ="/abs/2411.13945" title="Abstract" id="2411.13945"> arXiv:2411.13945 </a> [<a href="/pdf/2411.13945" title="Download PDF" id="pdf-2411.13945" aria-labelledby="pdf-2411.13945">pdf</a>, <a href="https://arxiv.org/html/2411.13945v1" title="View HTML" id="html-2411.13945" aria-labelledby="html-2411.13945" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13945" title="Other formats" id="oth-2411.13945" aria-labelledby="oth-2411.13945">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Neuromorphic Attitude Estimation and Control </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Stroobants,+S">Stein Stroobants</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=de+Wagter,+C">Christophe de Wagter</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=De+Croon,+G+C">Guido C.H.E. De Croon</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span>; Machine Learning (cs.LG); Neural and Evolutionary Computing (cs.NE) </div> <p class='mathjax'> The real-world application of small drones is mostly hampered by energy limitations. Neuromorphic computing promises extremely energy-efficient AI for autonomous flight, but is still challenging to train and deploy on real robots. In order to reap the maximal benefits from neuromorphic computing, it is desired to perform all autonomy functions end-to-end on a single neuromorphic chip, from low-level attitude control to high-level navigation. This research presents the first neuromorphic control system using a spiking neural network (SNN) to effectively map a drone's raw sensory input directly to motor commands. We apply this method to low-level attitude estimation and control for a quadrotor, deploying the SNN on a tiny Crazyflie. We propose a modular SNN, separately training and then merging estimation and control sub-networks. The SNN is trained with imitation learning, using a flight dataset of sensory-motor pairs. Post-training, the network is deployed on the Crazyflie, issuing control commands from sensor inputs at $500$Hz. Furthermore, for the training procedure we augmented training data by flying a controller with additional excitation and time-shifting the target data to enhance the predictive capabilities of the SNN. On the real drone the perception-to-control SNN tracks attitude commands with an average error of $3$ degrees, compared to $2.5$ degrees for the regular flight stack. We also show the benefits of the proposed learning modifications for reducing the average tracking error and reducing oscillations. Our work shows the feasibility of performing neuromorphic end-to-end control, laying the basis for highly energy-efficient and low-latency neuromorphic autopilots. </p> </div> </dd> <dt> <a name='item168'>[168]</a> <a href ="/abs/2411.13946" title="Abstract" id="2411.13946"> arXiv:2411.13946 </a> [<a href="/pdf/2411.13946" title="Download PDF" id="pdf-2411.13946" aria-labelledby="pdf-2411.13946">pdf</a>, <a href="/format/2411.13946" title="Other formats" id="oth-2411.13946" aria-labelledby="oth-2411.13946">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Systematic Literature Review on Technology Acceptance Research on Augmented Reality in the Field of Training and Education </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Graser,+S">Stefan Graser</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=B%C3%B6hm,+S">Stephan B枚hm</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 8 pages, 7 figures, The Fifteenth International Conference on Advances in Human-oriented and Personalized Mechanisms, Technologies, and Services CENTRIC 2022 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Human-Computer Interaction (cs.HC)</span> </div> <p class='mathjax'> Augmented Reality (AR) is an emerging technology that ranks among the top innovations in interactive media. With the emergence of new technologies, the question about the factors influencing user acceptance arises. Many research models on the user acceptance of technologies were developed and extended to answer this question in the last decades. This research paper provides an overview of the current state in the scientific literature on user acceptance factors of AR in training and education. We conducted a systematic literature review, identifying 45 scientific papers on technology acceptance of augmented reality. Twenty-two papers refer more specifically to the field of training and education. Overall, 33 different technology acceptance models and 34 acceptance variables were identified. Based on the results, there is a great potential for further research. </p> </div> </dd> <dt> <a name='item169'>[169]</a> <a href ="/abs/2411.13949" title="Abstract" id="2411.13949"> arXiv:2411.13949 </a> [<a href="/pdf/2411.13949" title="Download PDF" id="pdf-2411.13949" aria-labelledby="pdf-2411.13949">pdf</a>, <a href="https://arxiv.org/html/2411.13949v1" title="View HTML" id="html-2411.13949" aria-labelledby="html-2411.13949" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13949" title="Other formats" id="oth-2411.13949" aria-labelledby="oth-2411.13949">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Separable Mixture of Low-Rank Adaptation for Continual Visual Instruction Tuning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Z">Ziqi Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Che,+C">Chang Che</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Q">Qi Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+Y">Yangyang Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shi,+Z">Zenglin Shi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+M">Meng Wang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Visual instruction tuning (VIT) enables multimodal large language models (MLLMs) to effectively handle a wide range of vision tasks by framing them as language-based instructions. Building on this, continual visual instruction tuning (CVIT) extends the capability of MLLMs to incrementally learn new tasks, accommodating evolving functionalities. While prior work has advanced CVIT through the development of new benchmarks and approaches to mitigate catastrophic forgetting, these efforts largely follow traditional continual learning paradigms, neglecting the unique challenges specific to CVIT. We identify a dual form of catastrophic forgetting in CVIT, where MLLMs not only forget previously learned visual understanding but also experience a decline in instruction following abilities as they acquire new tasks. To address this, we introduce the Separable Mixture of Low-Rank Adaptation (SMoLoRA) framework, which employs separable routing through two distinct modules - one for visual understanding and another for instruction following. This dual-routing design enables specialized adaptation in both domains, preventing forgetting while improving performance. Furthermore, we propose a novel CVIT benchmark that goes beyond existing benchmarks by additionally evaluating a model's ability to generalize to unseen tasks and handle diverse instructions across various tasks. Extensive experiments demonstrate that SMoLoRA outperforms existing methods in mitigating dual forgetting, improving generalization to unseen tasks, and ensuring robustness in following diverse instructions. </p> </div> </dd> <dt> <a name='item170'>[170]</a> <a href ="/abs/2411.13950" title="Abstract" id="2411.13950"> arXiv:2411.13950 </a> [<a href="/pdf/2411.13950" title="Download PDF" id="pdf-2411.13950" aria-labelledby="pdf-2411.13950">pdf</a>, <a href="https://arxiv.org/html/2411.13950v1" title="View HTML" id="html-2411.13950" aria-labelledby="html-2411.13950" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13950" title="Other formats" id="oth-2411.13950" aria-labelledby="oth-2411.13950">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Socio-Technical Grounded Theory on the Effect of Cognitive Dysfunctions in the Performance of Software Developers with ADHD and Autism </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Gama,+K">Kiev Gama</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liebel,+G">Grischa Liebel</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Goul%C3%A3o,+M">Miguel Goul茫o</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lacerda,+A">Aline Lacerda</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lacerda,+C">Cristiana Lacerda</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Software Engineering (cs.SE)</span> </div> <p class='mathjax'> The concept of neurodiversity, encompassing conditions such as Autism Spectrum Disorder (ASD), Attention-Deficit/Hyperactivity Disorder (ADHD), dyslexia, and dyspraxia, challenges traditional views of these neurodevelopmental variations as disorders and instead frames them as natural cognitive differences that contribute to unique ways of thinking and problem-solving. Within the software development industry, known for its emphasis on innovation, there is growing recognition of the value neurodivergent individuals bring to technical teams. Despite this, research on the contributions of neurodivergent individuals in Software Engineering (SE) remains limited. This interdisciplinary Socio-Technical Grounded Theory study addresses this gap by exploring the experiences of neurodivergent software engineers with ASD and ADHD, examining the cognitive and emotional challenges they face in software teams. Based on interviews and a survey with 25 neurodivergent and 5 neurotypical individuals, our theory describes how neurodivergent cognitive dysfunctions affect SE performance, and how the individuals' individual journey and various accommodations can regulate this effect. We conclude our paper with a list of inclusive Agile practices, allowing organizations to better support neurodivergent employees and fully leverage their capabilities. </p> </div> </dd> <dt> <a name='item171'>[171]</a> <a href ="/abs/2411.13951" title="Abstract" id="2411.13951"> arXiv:2411.13951 </a> [<a href="/pdf/2411.13951" title="Download PDF" id="pdf-2411.13951" aria-labelledby="pdf-2411.13951">pdf</a>, <a href="https://arxiv.org/html/2411.13951v1" title="View HTML" id="html-2411.13951" aria-labelledby="html-2411.13951" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13951" title="Other formats" id="oth-2411.13951" aria-labelledby="oth-2411.13951">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Dataset for Evaluating Online Anomaly Detection Approaches for Discrete Multivariate Time Series </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Correia,+L">Lucas Correia</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Goos,+J">Jan-Christoph Goos</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=B%C3%A4ck,+T">Thomas B盲ck</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kononova,+A+V">Anna V. Kononova</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Computational Engineering, Finance, and Science (cs.CE); Systems and Control (eess.SY) </div> <p class='mathjax'> Benchmarking anomaly detection approaches for multivariate time series is challenging due to the lack of high-quality datasets. Current publicly available datasets are too small, not diverse and feature trivial anomalies, which hinders measurable progress in this research area. We propose a solution: a diverse, extensive, and non-trivial dataset generated via state-of-the-art simulation tools that reflects realistic behaviour of an automotive powertrain, including its multivariate, dynamic and variable-state properties. To cater for both unsupervised and semi-supervised anomaly detection settings, as well as time series generation and forecasting, we make different versions of the dataset available, where training and test subsets are offered in contaminated and clean versions, depending on the task. We also provide baseline results from a small selection of approaches based on deterministic and variational autoencoders, as well as a non-parametric approach. As expected, the baseline experimentation shows that the approaches trained on the semi-supervised version of the dataset outperform their unsupervised counterparts, highlighting a need for approaches more robust to contaminated training data. </p> </div> </dd> <dt> <a name='item172'>[172]</a> <a href ="/abs/2411.13952" title="Abstract" id="2411.13952"> arXiv:2411.13952 </a> [<a href="/pdf/2411.13952" title="Download PDF" id="pdf-2411.13952" aria-labelledby="pdf-2411.13952">pdf</a>, <a href="https://arxiv.org/html/2411.13952v1" title="View HTML" id="html-2411.13952" aria-labelledby="html-2411.13952" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13952" title="Other formats" id="oth-2411.13952" aria-labelledby="oth-2411.13952">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Learning thin deformable object manipulation with a multi-sensory integrated soft hand </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+C">Chao Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jiang,+C">Chunli Jiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Luo,+L">Lifan Luo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yuan,+S">Shuai Yuan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+Q">Qifeng Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+H">Hongyu Yu</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 19 pages </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span> </div> <p class='mathjax'> Robotic manipulation has made significant advancements, with systems demonstrating high precision and repeatability. However, this remarkable precision often fails to translate into efficient manipulation of thin deformable objects. Current robotic systems lack imprecise dexterity, the ability to perform dexterous manipulation through robust and adaptive behaviors that do not rely on precise control. This paper explores the singulation and grasping of thin, deformable objects. Here, we propose a novel solution that incorporates passive compliance, touch, and proprioception into thin, deformable object manipulation. Our system employs a soft, underactuated hand that provides passive compliance, facilitating adaptive and gentle interactions to dexterously manipulate deformable objects without requiring precise control. The tactile and force/torque sensors equipped on the hand, along with a depth camera, gather sensory data required for manipulation via the proposed slip module. The manipulation policies are learned directly from raw sensory data via model-free reinforcement learning, bypassing explicit environmental and object modeling. We implement a hierarchical double-loop learning process to enhance learning efficiency by decoupling the action space. Our method was deployed on real-world robots and trained in a self-supervised manner. The resulting policy was tested on a variety of challenging tasks that were beyond the capabilities of prior studies, ranging from displaying suit fabric like a salesperson to turning pages of sheet music for violinists. </p> </div> </dd> <dt> <a name='item173'>[173]</a> <a href ="/abs/2411.13953" title="Abstract" id="2411.13953"> arXiv:2411.13953 </a> [<a href="/pdf/2411.13953" title="Download PDF" id="pdf-2411.13953" aria-labelledby="pdf-2411.13953">pdf</a>, <a href="https://arxiv.org/html/2411.13953v1" title="View HTML" id="html-2411.13953" aria-labelledby="html-2411.13953" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13953" title="Other formats" id="oth-2411.13953" aria-labelledby="oth-2411.13953">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Material synthesis through simulations guided by machine learning: a position paper </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Syed,+U">Usman Syed</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cunico,+F">Federico Cunico</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Khan,+U">Uzair Khan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Radicchi,+E">Eros Radicchi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Setti,+F">Francesco Setti</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Speghini,+A">Adolfo Speghini</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Marone,+P">Paolo Marone</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Semenzin,+F">Filiberto Semenzin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cristani,+M">Marco Cristani</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> In this position paper, we propose an approach for sustainable data collection in the field of optimal mix design for marble sludge reuse. Marble sludge, a calcium-rich residual from stone-cutting processes, can be repurposed by mixing it with various ingredients. However, determining the optimal mix design is challenging due to the variability in sludge composition and the costly, time-consuming nature of experimental data collection. Also, we investigate the possibility of using machine learning models using meta-learning as an optimization tool to estimate the correct quantity of stone-cutting sludge to be used in aggregates to obtain a mix design with specific mechanical properties that can be used successfully in the building industry. Our approach offers two key advantages: (i) through simulations, a large dataset can be generated, saving time and money during the data collection phase, and (ii) Utilizing machine learning models, with performance enhancement through hyper-parameter optimization via meta-learning, to estimate optimal mix designs reducing the need for extensive manual experimentation, lowering costs, minimizing environmental impact, and accelerating the processing of quarry sludge. Our idea promises to streamline the marble sludge reuse process by leveraging collective data and advanced machine learning, promoting sustainability and efficiency in the stonecutting sector. </p> </div> </dd> <dt> <a name='item174'>[174]</a> <a href ="/abs/2411.13957" title="Abstract" id="2411.13957"> arXiv:2411.13957 </a> [<a href="/pdf/2411.13957" title="Download PDF" id="pdf-2411.13957" aria-labelledby="pdf-2411.13957">pdf</a>, <a href="https://arxiv.org/html/2411.13957v1" title="View HTML" id="html-2411.13957" aria-labelledby="html-2411.13957" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13957" title="Other formats" id="oth-2411.13957" aria-labelledby="oth-2411.13957">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Variational Multiscale Evolve and Filter Strategies for Convection-Dominated Flows </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Strazzullo,+M">Maria Strazzullo</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Ballarin,+F">Francesco Ballarin</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Iliescu,+T">Traian Iliescu</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Rebollo,+T+C">Tom谩s Chac贸n Rebollo</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Numerical Analysis (math.NA)</span>; Fluid Dynamics (physics.flu-dyn) </div> <p class='mathjax'> The evolve-filter (EF) model is a filter-based numerical stabilization for under-resolved convection-dominated flows. EF is a simple, modular, and effective strategy for both full-order models (FOMs) and reduced-order models (ROMs). It is well-known, however, that when the filter radius is too large, EF can be overdiffusive and yield inaccurate results. To alleviate this, EF is usually supplemented with a relaxation step. The relaxation parameter, however, is very sensitive with respect to the model parameters. In this paper, we propose a novel strategy to alleviate the EF overdiffusivity for a large filter radius. Specifically, we leverage the variational multiscale (VMS) framework to separate the large resolved scales from the small resolved scales in the evolved velocity, and we use the filtered small scales to correct the large scales. Furthermore, in the new VMS-EF strategy, we use two different ways to decompose the evolved velocity: the VMS Evolve-Filter-Filter-Correct (VMS-EFFC) and the VMS Evolve-Postprocess-Filter-Correct (VMS-EPFC) algorithms. The new VMS-based algorithms yield significantly more accurate results than the standard EF in both the FOM and the ROM simulations of a flow past a cylinder at Reynolds number Re = 1000. </p> </div> </dd> <dt> <a name='item175'>[175]</a> <a href ="/abs/2411.13958" title="Abstract" id="2411.13958"> arXiv:2411.13958 </a> [<a href="/pdf/2411.13958" title="Download PDF" id="pdf-2411.13958" aria-labelledby="pdf-2411.13958">pdf</a>, <a href="https://arxiv.org/html/2411.13958v1" title="View HTML" id="html-2411.13958" aria-labelledby="html-2411.13958" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13958" title="Other formats" id="oth-2411.13958" aria-labelledby="oth-2411.13958">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Sentiment Analysis of Economic Text: A Lexicon-Based Approach </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Barbaglia,+L">Luca Barbaglia</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Consoli,+S">Sergio Consoli</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Manzan,+S">Sebastiano Manzan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pezzoli,+L+T">Luca Tiozzo Pezzoli</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tosetti,+E">Elisa Tosetti</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 37 pages, 9 figures, 6 tables, in press </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> Economic Inquiry, 1-19 (2024) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computational Engineering, Finance, and Science (cs.CE)</span>; Computation and Language (cs.CL); Computers and Society (cs.CY) </div> <p class='mathjax'> We propose an Economic Lexicon (EL) specifically designed for textual applications in economics. We construct the dictionary with two important characteristics: 1) to have a wide coverage of terms used in documents discussing economic concepts, and 2) to provide a human-annotated sentiment score in the range [-1,1]. We illustrate the use of the EL in the context of a simple sentiment measure and consider several applications in economics. The comparison to other lexicons shows that the EL is superior due to its wider coverage of domain relevant terms and its more accurate categorization of the word sentiment. </p> </div> </dd> <dt> <a name='item176'>[176]</a> <a href ="/abs/2411.13961" title="Abstract" id="2411.13961"> arXiv:2411.13961 </a> [<a href="/pdf/2411.13961" title="Download PDF" id="pdf-2411.13961" aria-labelledby="pdf-2411.13961">pdf</a>, <a href="https://arxiv.org/html/2411.13961v1" title="View HTML" id="html-2411.13961" aria-labelledby="html-2411.13961" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13961" title="Other formats" id="oth-2411.13961" aria-labelledby="oth-2411.13961">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Zero-Shot Low-Light Image Enhancement via Joint Frequency Domain Priors Guided Diffusion </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=He,+J">Jinhong He</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Palaiahnakote,+S">Shivakumara Palaiahnakote</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ning,+A">Aoxiang Ning</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xue,+M">Minglong Xue</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Due to the singularity of real-world paired datasets and the complexity of low-light environments, this leads to supervised methods lacking a degree of scene generalisation. Meanwhile, limited by poor lighting and content guidance, existing zero-shot methods cannot handle unknown severe degradation well. To address this problem, we will propose a new zero-shot low-light enhancement method to compensate for the lack of light and structural information in the diffusion sampling process by effectively combining the wavelet and Fourier frequency domains to construct rich a priori information. The key to the inspiration comes from the similarity between the wavelet and Fourier frequency domains: both light and structure information are closely related to specific frequency domain regions, respectively. Therefore, by transferring the diffusion process to the wavelet low-frequency domain and combining the wavelet and Fourier frequency domains by continuously decomposing them in the inverse process, the constructed rich illumination prior is utilised to guide the image generation enhancement process. Sufficient experiments show that the framework is robust and effective in various scenarios. The code will be available at: \href{<a href="https://github.com/hejh8/Joint-Wavelet-and-Fourier-priors-guided-diffusion" rel="external noopener nofollow" class="link-external link-https">this https URL</a>}{<a href="https://github.com/hejh8/Joint-Wavelet-and-Fourier-priors-guided-diffusion" rel="external noopener nofollow" class="link-external link-https">this https URL</a>}. </p> </div> </dd> <dt> <a name='item177'>[177]</a> <a href ="/abs/2411.13962" title="Abstract" id="2411.13962"> arXiv:2411.13962 </a> [<a href="/pdf/2411.13962" title="Download PDF" id="pdf-2411.13962" aria-labelledby="pdf-2411.13962">pdf</a>, <a href="https://arxiv.org/html/2411.13962v1" title="View HTML" id="html-2411.13962" aria-labelledby="html-2411.13962" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13962" title="Other formats" id="oth-2411.13962" aria-labelledby="oth-2411.13962">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Hybrid-Neuromorphic Approach for Underwater Robotics Applications: A Conceptual Framework </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Sudevan,+V">Vidya Sudevan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zayer,+F">Fakhreddine Zayer</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Javed,+S">Sajid Javed</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Karki,+H">Hamad Karki</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=De+Masi,+G">Giulia De Masi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dias,+J">Jorge Dias</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span> </div> <p class='mathjax'> This paper introduces the concept of employing neuromorphic methodologies for task-oriented underwater robotics applications. In contrast to the increasing computational demands of conventional deep learning algorithms, neuromorphic technology, leveraging spiking neural network architectures, promises sophisticated artificial intelligence with significantly reduced computational requirements and power consumption, emulating human brain operational principles. Despite documented neuromorphic technology applications in various robotic domains, its utilization in marine robotics remains largely unexplored. Thus, this article proposes a unified framework for integrating neuromorphic technologies for perception, pose estimation, and haptic-guided conditional control of underwater vehicles, customized to specific user-defined objectives. This conceptual framework stands to revolutionize underwater robotics, enhancing efficiency and autonomy while reducing energy consumption. By enabling greater adaptability and robustness, this advancement could facilitate applications such as underwater exploration, environmental monitoring, and infrastructure maintenance, thereby contributing to significant progress in marine science and technology. </p> </div> </dd> <dt> <a name='item178'>[178]</a> <a href ="/abs/2411.13975" title="Abstract" id="2411.13975"> arXiv:2411.13975 </a> [<a href="/pdf/2411.13975" title="Download PDF" id="pdf-2411.13975" aria-labelledby="pdf-2411.13975">pdf</a>, <a href="https://arxiv.org/html/2411.13975v1" title="View HTML" id="html-2411.13975" aria-labelledby="html-2411.13975" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13975" title="Other formats" id="oth-2411.13975" aria-labelledby="oth-2411.13975">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Transforming Static Images Using Generative Models for Video Salient Object Detection </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Cho,+S">Suhwan Cho</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lee,+M">Minhyeok Lee</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lee,+J">Jungho Lee</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lee,+S">Sangyoun Lee</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> In many video processing tasks, leveraging large-scale image datasets is a common strategy, as image data is more abundant and facilitates comprehensive knowledge transfer. A typical approach for simulating video from static images involves applying spatial transformations, such as affine transformations and spline warping, to create sequences that mimic temporal progression. However, in tasks like video salient object detection, where both appearance and motion cues are critical, these basic image-to-video techniques fail to produce realistic optical flows that capture the independent motion properties of each object. In this study, we show that image-to-video diffusion models can generate realistic transformations of static images while understanding the contextual relationships between image components. This ability allows the model to generate plausible optical flows, preserving semantic integrity while reflecting the independent motion of scene elements. By augmenting individual images in this way, we create large-scale image-flow pairs that significantly enhance model training. Our approach achieves state-of-the-art performance across all public benchmark datasets, outperforming existing approaches. </p> </div> </dd> <dt> <a name='item179'>[179]</a> <a href ="/abs/2411.13978" title="Abstract" id="2411.13978"> arXiv:2411.13978 </a> [<a href="/pdf/2411.13978" title="Download PDF" id="pdf-2411.13978" aria-labelledby="pdf-2411.13978">pdf</a>, <a href="https://arxiv.org/html/2411.13978v1" title="View HTML" id="html-2411.13978" aria-labelledby="html-2411.13978" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13978" title="Other formats" id="oth-2411.13978" aria-labelledby="oth-2411.13978">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Breadboarding the European Moon Rover System: discussion and results of the analogue field test campaign </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Luna,+C">Cristina Luna</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Egu%C3%ADluz,+A+G">Augusto G贸mez Egu铆luz</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Barrientos-D%C3%ADez,+J">Jorge Barrientos-D铆ez</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Moreno,+A">Almudena Moreno</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Guerra,+A">Alba Guerra</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Esquer,+M">Manuel Esquer</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Seoane,+M+L">Marina L. Seoane</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kay,+S">Steven Kay</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cameron,+A">Angus Cameron</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cama%C3%B1es,+C">Carmen Cama帽es</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Haas,+P">Philipp Haas</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Papantoniou,+V">Vassilios Papantoniou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wedler,+A">Armin Wedler</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rebele,+B">Bernhard Rebele</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Reynolds,+J">Jennifer Reynolds</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Landgraf,+M">Markus Landgraf</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 6 pages, 5 figures, conference International Conference on Space Robotics </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> 2024 International Conference on Space Robotics (iSpaRo), Luxembourg, Luxembourg, 2024, pp. 145-150 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span>; Earth and Planetary Astrophysics (astro-ph.EP); Instrumentation and Methods for Astrophysics (astro-ph.IM) </div> <p class='mathjax'> This document compiles results obtained from the test campaign of the European Moon Rover System (EMRS) project. The test campaign, conducted at the Planetary Exploration Lab of DLR in Wessling, aimed to understand the scope of the EMRS breadboard design, its strengths, and the benefits of the modular design. The discussion of test results is based on rover traversal analyses, robustness assessments, wheel deflection analyses, and the overall transportation cost of the rover. This not only enables the comparison of locomotion modes on lunar regolith but also facilitates critical decision-making in the design of future lunar missions. </p> </div> </dd> <dt> <a name='item180'>[180]</a> <a href ="/abs/2411.13979" title="Abstract" id="2411.13979"> arXiv:2411.13979 </a> [<a href="/pdf/2411.13979" title="Download PDF" id="pdf-2411.13979" aria-labelledby="pdf-2411.13979">pdf</a>, <a href="https://arxiv.org/html/2411.13979v1" title="View HTML" id="html-2411.13979" aria-labelledby="html-2411.13979" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13979" title="Other formats" id="oth-2411.13979" aria-labelledby="oth-2411.13979">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> FedRAV: Hierarchically Federated Region-Learning for Traffic Object Classification of Autonomous Vehicles </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhai,+Y">Yijun Zhai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+P">Pengzhan Zhou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=He,+Y">Yuepeng He</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Qu,+F">Fang Qu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Qin,+Z">Zhida Qin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jiao,+X">Xianlong Jiao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+G">Guiyan Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Guo,+S">Songtao Guo</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 8 pages, 4 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Distributed, Parallel, and Cluster Computing (cs.DC)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> The emerging federated learning enables distributed autonomous vehicles to train equipped deep learning models collaboratively without exposing their raw data, providing great potential for utilizing explosively growing autonomous driving data. However, considering the complicated traffic environments and driving scenarios, deploying federated learning for autonomous vehicles is inevitably challenged by non-independent and identically distributed (Non-IID) data of vehicles, which may lead to failed convergence and low training accuracy. In this paper, we propose a novel hierarchically Federated Region-learning framework of Autonomous Vehicles (FedRAV), a two-stage framework, which adaptively divides a large area containing vehicles into sub-regions based on the defined region-wise distance, and achieves personalized vehicular models and regional models. This approach ensures that the personalized vehicular model adopts the beneficial models while discarding the unprofitable ones. We validate our FedRAV framework against existing federated learning algorithms on three real-world autonomous driving datasets in various heterogeneous settings. The experiment results demonstrate that our framework outperforms those known algorithms, and improves the accuracy by at least 3.69%. The source code of FedRAV is available at: <a href="https://github.com/yjzhai-cs/FedRAV" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item181'>[181]</a> <a href ="/abs/2411.13981" title="Abstract" id="2411.13981"> arXiv:2411.13981 </a> [<a href="/pdf/2411.13981" title="Download PDF" id="pdf-2411.13981" aria-labelledby="pdf-2411.13981">pdf</a>, <a href="https://arxiv.org/html/2411.13981v1" title="View HTML" id="html-2411.13981" aria-labelledby="html-2411.13981" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13981" title="Other formats" id="oth-2411.13981" aria-labelledby="oth-2411.13981">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> On the Fairness, Diversity and Reliability of Text-to-Image Generative Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Vice,+J">Jordan Vice</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Akhtar,+N">Naveed Akhtar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hartley,+R">Richard Hartley</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mian,+A">Ajmal Mian</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> This research is supported by the NISDRG project #20100007, funded by the Australian Government </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> The widespread availability of multimodal generative models has sparked critical discussions on their fairness, reliability, and potential for misuse. While text-to-image models can produce high-fidelity, user-guided images, they also exhibit unpredictable behavior and vulnerabilities, which can be exploited to manipulate class or concept representations. To address this, we propose an evaluation framework designed to assess model reliability through their responses to globally- and locally-applied `semantic' perturbations in the embedding space, pinpointing inputs that trigger unreliable behavior. Our approach offers deeper insights into two essential aspects: (i) generative diversity, evaluating the breadth of visual representations for learned concepts, and (ii) generative fairness, examining how removing concepts from input prompts affects semantic guidance. Beyond these evaluations, our method lays the groundwork for detecting unreliable, bias-injected models and retrieval of bias provenance. We will release our code. <br>Keywords: Fairness, Reliability, AI Ethics, Bias, Text-to-Image Models </p> </div> </dd> <dt> <a name='item182'>[182]</a> <a href ="/abs/2411.13982" title="Abstract" id="2411.13982"> arXiv:2411.13982 </a> [<a href="/pdf/2411.13982" title="Download PDF" id="pdf-2411.13982" aria-labelledby="pdf-2411.13982">pdf</a>, <a href="https://arxiv.org/html/2411.13982v1" title="View HTML" id="html-2411.13982" aria-labelledby="html-2411.13982" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13982" title="Other formats" id="oth-2411.13982" aria-labelledby="oth-2411.13982">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Safety Without Semantic Disruptions: Editing-free Safe Image Generation via Context-preserving Dual Latent Reconstruction </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Vice,+J">Jordan Vice</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Akhtar,+N">Naveed Akhtar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hartley,+R">Richard Hartley</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mian,+A">Ajmal Mian</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> This research is supported by the NISDRG project #20100007, funded by the Australian Government </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Training multimodal generative models on large, uncurated datasets can result in users being exposed to harmful, unsafe and controversial or culturally-inappropriate outputs. While model editing has been proposed to remove or filter undesirable concepts in embedding and latent spaces, it can inadvertently damage learned manifolds, distorting concepts in close semantic proximity. We identify limitations in current model editing techniques, showing that even benign, proximal concepts may become misaligned. To address the need for safe content generation, we propose a modular, dynamic solution that leverages safety-context embeddings and a dual reconstruction process using tunable weighted summation in the latent space to generate safer images. Our method preserves global context without compromising the structural integrity of the learned manifolds. We achieve state-of-the-art results on safe image generation benchmarks, while offering controllable variation of model safety. We identify trade-offs between safety and censorship, which presents a necessary perspective in the development of ethical AI models. We will release our code. <br>Keywords: Text-to-Image Models, Generative AI, Safety, Reliability, Model Editing </p> </div> </dd> <dt> <a name='item183'>[183]</a> <a href ="/abs/2411.13983" title="Abstract" id="2411.13983"> arXiv:2411.13983 </a> [<a href="/pdf/2411.13983" title="Download PDF" id="pdf-2411.13983" aria-labelledby="pdf-2411.13983">pdf</a>, <a href="https://arxiv.org/html/2411.13983v1" title="View HTML" id="html-2411.13983" aria-labelledby="html-2411.13983" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13983" title="Other formats" id="oth-2411.13983" aria-labelledby="oth-2411.13983">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Learning Two-agent Motion Planning Strategies from Generalized Nash Equilibrium for Model Predictive Control </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+H">Hansung Kim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+E+L">Edward L. Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lim,+C+S">Chang Seok Lim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Borrelli,+F">Francesco Borrelli</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Submitted to 2025 Learning for Dynamics and Control Conference (L4DC) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Multiagent Systems (cs.MA)</span>; Robotics (cs.RO); Systems and Control (eess.SY) </div> <p class='mathjax'> We introduce an Implicit Game-Theoretic MPC (IGT-MPC), a decentralized algorithm for two-agent motion planning that uses a learned value function that predicts the game-theoretic interaction outcomes as the terminal cost-to-go function in a model predictive control (MPC) framework, guiding agents to implicitly account for interactions with other agents and maximize their reward. This approach applies to competitive and cooperative multi-agent motion planning problems which we formulate as constrained dynamic games. Given a constrained dynamic game, we randomly sample initial conditions and solve for the generalized Nash equilibrium (GNE) to generate a dataset of GNE solutions, computing the reward outcome of each game-theoretic interaction from the GNE. The data is used to train a simple neural network to predict the reward outcome, which we use as the terminal cost-to-go function in an MPC scheme. We showcase emerging competitive and coordinated behaviors using IGT-MPC in scenarios such as two-vehicle head-to-head racing and un-signalized intersection navigation. IGT-MPC offers a novel method integrating machine learning and game-theoretic reasoning into model-based decentralized multi-agent motion planning. </p> </div> </dd> <dt> <a name='item184'>[184]</a> <a href ="/abs/2411.13985" title="Abstract" id="2411.13985"> arXiv:2411.13985 </a> [<a href="/pdf/2411.13985" title="Download PDF" id="pdf-2411.13985" aria-labelledby="pdf-2411.13985">pdf</a>, <a href="https://arxiv.org/html/2411.13985v1" title="View HTML" id="html-2411.13985" aria-labelledby="html-2411.13985" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13985" title="Other formats" id="oth-2411.13985" aria-labelledby="oth-2411.13985">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Representing Hypergraphs by Point-Line Incidences </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Dobler,+A">Alexander Dobler</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kobourov,+S">Stephen Kobourov</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mondal,+D">Debajyoti Mondal</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=N%C3%B6llenburg,+M">Martin N枚llenburg</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 21 pages, 11 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computational Geometry (cs.CG)</span> </div> <p class='mathjax'> We consider hypergraph visualizations that represent vertices as points in the plane and hyperedges as curves passing through the points of their incident vertices. Specifically, we consider several different variants of this problem by (a) restricting the curves to be lines or line segments, (b) allowing two curves to cross if they do not share an element, or not; and (c) allowing two curves to overlap or not. We show $\exists\mathbb{R}$-hardness for six of the eight resulting decision problem variants and describe polynomial-time algorithms in some restricted settings. Lastly, we briefly touch on what happens if we allow the lines of the represented hyperedges to have bends - to this we generalize a counterexample to a long-standing result that was sometimes assumed to be correct. </p> </div> </dd> <dt> <a name='item185'>[185]</a> <a href ="/abs/2411.13988" title="Abstract" id="2411.13988"> arXiv:2411.13988 </a> [<a href="/pdf/2411.13988" title="Download PDF" id="pdf-2411.13988" aria-labelledby="pdf-2411.13988">pdf</a>, <a href="https://arxiv.org/html/2411.13988v1" title="View HTML" id="html-2411.13988" aria-labelledby="html-2411.13988" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13988" title="Other formats" id="oth-2411.13988" aria-labelledby="oth-2411.13988">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Dehazing-aided Multi-Rate Multi-Modal Pose Estimation Framework for Mitigating Visual Disturbances in Extreme Underwater Domain </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Sudevan,+V">Vidya Sudevan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zayer,+F">Fakhreddine Zayer</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hassan,+T">Taimur Hassan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Javed,+S">Sajid Javed</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Karki,+H">Hamad Karki</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=De+Masi,+G">Giulia De Masi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dias,+J">Jorge Dias</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span> </div> <p class='mathjax'> This paper delves into the potential of DU-VIO, a dehazing-aided hybrid multi-rate multi-modal Visual-Inertial Odometry (VIO) estimation framework, designed to thrive in the challenging realm of extreme underwater environments. The cutting-edge DU-VIO framework is incorporating a GAN-based pre-processing module and a hybrid CNN-LSTM module for precise pose estimation, using visibility-enhanced underwater images and raw IMU data. Accurate pose estimation is paramount for various underwater robotics and exploration applications. However, underwater visibility is often compromised by suspended particles and attenuation effects, rendering visual-inertial pose estimation a formidable challenge. DU-VIO aims to overcome these limitations by effectively removing visual disturbances from raw image data, enhancing the quality of image features used for pose estimation. We demonstrate the effectiveness of DU-VIO by calculating RMSE scores for translation and rotation vectors in comparison to their reference values. These scores are then compared to those of a base model using a modified AQUALOC Dataset. This study's significance lies in its potential to revolutionize underwater robotics and exploration. DU-VIO offers a robust solution to the persistent challenge of underwater visibility, significantly improving the accuracy of pose estimation. This research contributes valuable insights and tools for advancing underwater technology, with far-reaching implications for scientific research, environmental monitoring, and industrial applications. </p> </div> </dd> <dt> <a name='item186'>[186]</a> <a href ="/abs/2411.13989" title="Abstract" id="2411.13989"> arXiv:2411.13989 </a> [<a href="/pdf/2411.13989" title="Download PDF" id="pdf-2411.13989" aria-labelledby="pdf-2411.13989">pdf</a>, <a href="https://arxiv.org/html/2411.13989v1" title="View HTML" id="html-2411.13989" aria-labelledby="html-2411.13989" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13989" title="Other formats" id="oth-2411.13989" aria-labelledby="oth-2411.13989">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Towards Smart Fronthauling Management: Experimental Insights from a 5G Testbed </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Morini,+M">Marcello Morini</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Moro,+E">Eugenio Moro</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Filippini,+I">Ilario Filippini</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=De+Donno,+D">Danilo De Donno</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Moscato,+S">Salvatore Moscato</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Capone,+A">Antonio Capone</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Networking and Internet Architecture (cs.NI)</span> </div> <p class='mathjax'> The fronthaul connection is a key component of Centralized RAN (C-RAN) architectures, consistently required to handle high capacity demands. However, this critical feature is at risk when the transport link relies on wireless technology. Fortunately, solutions exist to enhance the reliability of wireless links. In this paper, we recall the theoretical fronthaul model, present a dynamic reconfiguration strategy and perform a conclusive experiment. Specifically, we showcase the setup of a wireless fronthaul testbed and discuss the resulting measurements. For this task, we leveraged the commercial hardware provided by the High-Frequency Campus Lab (HFCL), a private 5G network with millimeter wave (mmWave) radio access interface. Our experiments provide original data on the fronthaul utilization in this real deployment, demonstrating both a good accordance with the theoretical model discussed in [1] and the viability of one stabilizing solution. </p> </div> </dd> <dt> <a name='item187'>[187]</a> <a href ="/abs/2411.13990" title="Abstract" id="2411.13990"> arXiv:2411.13990 </a> [<a href="/pdf/2411.13990" title="Download PDF" id="pdf-2411.13990" aria-labelledby="pdf-2411.13990">pdf</a>, <a href="https://arxiv.org/html/2411.13990v1" title="View HTML" id="html-2411.13990" aria-labelledby="html-2411.13990" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13990" title="Other formats" id="oth-2411.13990" aria-labelledby="oth-2411.13990">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Repository-level Code Translation Benchmark Targeting Rust </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ou,+G">Guangsheng Ou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+M">Mingwei Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+Y">Yuxuan Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Peng,+X">Xing Peng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zheng,+Z">Zibin Zheng</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Software Engineering (cs.SE)</span> </div> <p class='mathjax'> Recent advances in large language models (LLMs) have shown significant capabilities in code translation, often evaluated using benchmarks like CodeTransOcean. However, these evaluations typically focus on simple, function-level translations without considering dependencies, which does not reflect the complexities of real-world software development. Further, their effectiveness in translating to newer, lower-resource languages like Rust in realistic scenarios is still under-explored. To address this gap, we introduce first repository-level code translation benchmark comprising 375 tasks targeting Rust, complete with relevant dependencies. Using this benchmark, we study four state-of-the-art LLMs, analyzing their erroneous outputs to understand their performance in more complex translation scenarios. Our findings reveal that LLMs exhibit substantially worse performance (41.5%-56.2% Pass@1 drop of GPT-4) on repository-level translations compared to simpler tasks, highlighting limitations in existing evaluation methods. The model that performed the best is Claude-3.5, demonstrating the strongest translation capabilities in both basic functionality accuracy and several relevant additional abilities. Additionally, we discover that LLMs struggle with identifying language differences in complex tasks, and that increased dependencies correlate with greater translation difficulty. </p> </div> </dd> <dt> <a name='item188'>[188]</a> <a href ="/abs/2411.13993" title="Abstract" id="2411.13993"> arXiv:2411.13993 </a> [<a href="/pdf/2411.13993" title="Download PDF" id="pdf-2411.13993" aria-labelledby="pdf-2411.13993">pdf</a>, <a href="https://arxiv.org/html/2411.13993v1" title="View HTML" id="html-2411.13993" aria-labelledby="html-2411.13993" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13993" title="Other formats" id="oth-2411.13993" aria-labelledby="oth-2411.13993">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Market Making without Regret </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Cesa-Bianchi,+N">Nicol貌 Cesa-Bianchi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cesari,+T">Tommaso Cesari</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Colomboni,+R">Roberto Colomboni</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Foscari,+L">Luigi Foscari</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pathak,+V">Vinayak Pathak</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Science and Game Theory (cs.GT)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> We consider a sequential decision-making setting where, at every round $t$, a market maker posts a bid price $B_t$ and an ask price $A_t$ to an incoming trader (the taker) with a private valuation for one unit of some asset. If the trader's valuation is lower than the bid price, or higher than the ask price, then a trade (sell or buy) occurs. If a trade happens at round $t$, then letting $M_t$ be the market price (observed only at the end of round $t$), the maker's utility is $M_t - B_t$ if the maker bought the asset, and $A_t - M_t$ if they sold it. We characterize the maker's regret with respect to the best fixed choice of bid and ask pairs under a variety of assumptions (adversarial, i.i.d., and their variants) on the sequence of market prices and valuations. Our upper bound analysis unveils an intriguing connection relating market making to first-price auctions and dynamic pricing. Our main technical contribution is a lower bound for the i.i.d. case with Lipschitz distributions and independence between prices and valuations. The difficulty in the analysis stems from the unique structure of the reward and feedback functions, allowing an algorithm to acquire information by graduating the "cost of exploration" in an arbitrary way. </p> </div> </dd> <dt> <a name='item189'>[189]</a> <a href ="/abs/2411.13994" title="Abstract" id="2411.13994"> arXiv:2411.13994 </a> [<a href="/pdf/2411.13994" title="Download PDF" id="pdf-2411.13994" aria-labelledby="pdf-2411.13994">pdf</a>, <a href="/format/2411.13994" title="Other formats" id="oth-2411.13994" aria-labelledby="oth-2411.13994">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Dual-Arm Telerobotic Platform for Robotic Hotbox Operations for Nuclear Waste Disposition in EM Sites </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Lee,+J">Joong-Ku Lee</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Park,+Y+S">Young Soo Park</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> This paper was submitted to Waste Management Symposia 2024 (WM2024) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span> </div> <p class='mathjax'> This paper introduces a dual-arm telerobotic platform designed to efficiently and safely execute hot cell operations for nuclear waste disposition at EM sites. The proposed system consists of a remote robot arm platform and a teleoperator station, both integrated with a software architecture to control the entire system. The dual-arm configuration of the remote platform enhances versatility and task performance in complex and hazardous environments, ensuring precise manipulation and effective handling of nuclear waste materials. The integration of a teleoperator station enables human teleoperator to remotely control the entire system real-time, enhancing decision-making capabilities, situational awareness, and dexterity. The control software plays a crucial role in our system, providing a robust and intuitive interface for the teleoperator. Test operation results demonstrate the system's effectiveness in operating as a remote hotbox for nuclear waste disposition, showcasing its potential applicability in real EM sites. </p> </div> </dd> <dt> <a name='item190'>[190]</a> <a href ="/abs/2411.13996" title="Abstract" id="2411.13996"> arXiv:2411.13996 </a> [<a href="/pdf/2411.13996" title="Download PDF" id="pdf-2411.13996" aria-labelledby="pdf-2411.13996">pdf</a>, <a href="/format/2411.13996" title="Other formats" id="oth-2411.13996" aria-labelledby="oth-2411.13996">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Contact Tooling Manipulation Control for Robotic Repair Platform </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Lee,+J">Joong-Ku Lee</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Park,+Y+S">Young Soo Park</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> This paper was submitted to Waste Management Symposia 2024 (WM2024) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span> </div> <p class='mathjax'> This paper delves into various robotic manipulation control methods designed for dynamic contact tooling operations on a robotic repair platform. The explored control strategies include hybrid position-force control, admittance control, bilateral telerobotic control, virtual fixture, and shared control. Each approach is elucidated and assessed in terms of its applicability and effectiveness for handling contact tooling tasks in real-world repair scenarios. The hybrid position-force controller is highlighted for its proficiency in executing precise force-required tasks, but it demands contingent on an accurate model of the environment and structured, static environment. In contrast, for unstructured environments, bilateral teleoperation control is investigated, revealing that the compliance with the remote robot controller is crucial for stable contact, albeit at the expense of reduced motion tracking performance. Moreover, advanced controllers for tooling manipulation tasks, such as virtual fixture and shared control approaches, are investigated for their potential applications. </p> </div> </dd> <dt> <a name='item191'>[191]</a> <a href ="/abs/2411.13997" title="Abstract" id="2411.13997"> arXiv:2411.13997 </a> [<a href="/pdf/2411.13997" title="Download PDF" id="pdf-2411.13997" aria-labelledby="pdf-2411.13997">pdf</a>, <a href="https://arxiv.org/html/2411.13997v1" title="View HTML" id="html-2411.13997" aria-labelledby="html-2411.13997" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13997" title="Other formats" id="oth-2411.13997" aria-labelledby="oth-2411.13997">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Mirror Target YOLO: An Improved YOLOv8 Method with Indirect Vision for Heritage Buildings Fire Detection </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Liang,+J">Jian Liang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cheng,+J">JunSheng Cheng</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Fires can cause severe damage to heritage buildings, making timely fire detection essential. Traditional dense cabling and drilling can harm these structures, so reducing the number of cameras to minimize such impact is challenging. Additionally, avoiding false alarms due to noise sensitivity and preserving the expertise of managers in fire-prone areas is crucial. To address these needs, we propose a fire detection method based on indirect vision, called Mirror Target YOLO (MITA-YOLO). MITA-YOLO integrates indirect vision deployment and an enhanced detection module. It uses mirror angles to achieve indirect views, solving issues with limited visibility in irregular spaces and aligning each indirect view with the target monitoring area. The Target-Mask module is designed to automatically identify and isolate the indirect vision areas in each image, filtering out non-target areas. This enables the model to inherit managers' expertise in assessing fire-risk zones, improving focus and resistance to interference in fire <a href="http://detection.In" rel="external noopener nofollow" class="link-external link-http">this http URL</a> our experiments, we created an 800-image fire dataset with indirect vision. Results show that MITA-YOLO significantly reduces camera requirements while achieving superior detection performance compared to other mainstream models. </p> </div> </dd> <dt> <a name='item192'>[192]</a> <a href ="/abs/2411.14000" title="Abstract" id="2411.14000"> arXiv:2411.14000 </a> [<a href="/pdf/2411.14000" title="Download PDF" id="pdf-2411.14000" aria-labelledby="pdf-2411.14000">pdf</a>, <a href="https://arxiv.org/html/2411.14000v1" title="View HTML" id="html-2411.14000" aria-labelledby="html-2411.14000" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14000" title="Other formats" id="oth-2411.14000" aria-labelledby="oth-2411.14000">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Multi-Layer Blockchain Simulator and Performance Evaluation of Social Internet of Vehicles with Multi-Connectivity Management </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Sun,+Y">Yi-Ting Sun</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lee,+H">Hsin-Chieh Lee</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+Y">Yun-Chen Yu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+T">Ting-Feng Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Althamary,+I">Ibrahim Althamary</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+C">Chih-Wei Huang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Networking and Internet Architecture (cs.NI)</span> </div> <p class='mathjax'> The evolution of vehicle-to-everything (V2X) communication brings significant challenges, such as data integrity and vulnerabilities stemming from centralized management. This paper presents an innovative integration of decentralized blockchain technology with V2X communication through a multi-layered architecture that combines the Simulation of Urban Mobility (SUMO) traffic simulator and the BlockSim blockchain simulator. In addition, as the Social Internet of Vehicles (SIoV) emerges, efficient resource management becomes indispensable for ensuring seamless communication. We also propose a reference multi-connectivity management method named Enhanced MAX-SINR, designed to advance research in blockchain-specific approaches, taking into account retransmission successfull rates. We evaluate blockchain performance in diverse environments such as urban, suburban, and rural areas, demonstrating that enhancing the success rate of retransmitted blockchain-related messages significantly boosts blockchain transaction performance and provides a foundation for developing intelligent SIoV systems. </p> </div> </dd> <dt> <a name='item193'>[193]</a> <a href ="/abs/2411.14001" title="Abstract" id="2411.14001"> arXiv:2411.14001 </a> [<a href="/pdf/2411.14001" title="Download PDF" id="pdf-2411.14001" aria-labelledby="pdf-2411.14001">pdf</a>, <a href="https://arxiv.org/html/2411.14001v1" title="View HTML" id="html-2411.14001" aria-labelledby="html-2411.14001" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14001" title="Other formats" id="oth-2411.14001" aria-labelledby="oth-2411.14001">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Graph Domain Adaptation with Dual-branch Encoder and Two-level Alignment for Whole Slide Image-based Survival Prediction </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Shou,+Y">Yuntao Shou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yan,+P">Peiqiang Yan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yuan,+X">Xingjian Yuan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cao,+X">Xiangyong Cao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+Q">Qian Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Meng,+D">Deyu Meng</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 12 pages, 6 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> In recent years, histopathological whole slide image (WSI)- based survival analysis has attracted much attention in medical image analysis. In practice, WSIs usually come from different hospitals or laboratories, which can be seen as different domains, and thus may have significant differences in imaging equipment, processing procedures, and sample sources. These differences generally result in large gaps in distribution between different WSI domains, and thus the survival analysis models trained on one domain may fail to transfer to another. To address this issue, we propose a Dual-branch Encoder and Two-level Alignment (DETA) framework to explore both feature and category-level alignment between different WSI domains. Specifically, we first formulate the concerned problem as graph domain adaptation (GDA) by virtue the graph representation of WSIs. Then we construct a dual-branch graph encoder, including the message passing branch and the shortest path branch, to explicitly and implicitly extract semantic information from the graph-represented WSIs. To realize GDA, we propose a two-level alignment approach: at the category level, we develop a coupling technique by virtue of the dual-branch structure, leading to reduced divergence between the category distributions of the two domains; at the feature level, we introduce an adversarial perturbation strategy to better augment source domain feature, resulting in improved alignment in feature distribution. To the best of our knowledge, our work is the first attempt to alleviate the domain shift issue for WSI data analysis. Extensive experiments on four TCGA datasets have validated the effectiveness of our proposed DETA framework and demonstrated its superior performance in WSI-based survival analysis. </p> </div> </dd> <dt> <a name='item194'>[194]</a> <a href ="/abs/2411.14002" title="Abstract" id="2411.14002"> arXiv:2411.14002 </a> [<a href="/pdf/2411.14002" title="Download PDF" id="pdf-2411.14002" aria-labelledby="pdf-2411.14002">pdf</a>, <a href="https://arxiv.org/html/2411.14002v1" title="View HTML" id="html-2411.14002" aria-labelledby="html-2411.14002" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14002" title="Other formats" id="oth-2411.14002" aria-labelledby="oth-2411.14002">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> SEMPose: A Single End-to-end Network for Multi-object Pose Estimation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+X">Xin Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+H">Hao Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xue,+S">Shibei Xue</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+D">Dezong Zhao</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> In computer vision, estimating the six-degree-of-freedom pose from an RGB image is a fundamental task. However, this task becomes highly challenging in multi-object scenes. Currently, the best methods typically employ an indirect strategy, which identifies 2D and 3D correspondences, and then solves with the Perspective-n-Points method. Yet, this approach cannot be trained end-to-end. Direct methods, on the other hand, suffer from lower accuracy due to challenges such as varying object sizes and occlusions. To address these issues, we propose SEMPose, an end-to-end multi-object pose estimation network. SEMPose utilizes a well-designed texture-shape guided feature pyramid network, effectively tackling the challenge of object size variations. Additionally, it employs an iterative refinement head structure, progressively regressing rotation and translation separately to enhance estimation accuracy. During training, we alleviate the impact of occlusion by selecting positive samples from visible parts. Experimental results demonstrate that SEMPose can perform inference at 32 FPS without requiring inputs other than the RGB image. It can accurately estimate the poses of multiple objects in real time, with inference time unaffected by the number of target objects. On the LM-O and YCB-V datasets, our method outperforms other RGB-based single-model methods, achieving higher accuracy. Even when compared with multi-model methods and approaches that use additional refinement, our results remain competitive. </p> </div> </dd> <dt> <a name='item195'>[195]</a> <a href ="/abs/2411.14003" title="Abstract" id="2411.14003"> arXiv:2411.14003 </a> [<a href="/pdf/2411.14003" title="Download PDF" id="pdf-2411.14003" aria-labelledby="pdf-2411.14003">pdf</a>, <a href="https://arxiv.org/html/2411.14003v1" title="View HTML" id="html-2411.14003" aria-labelledby="html-2411.14003" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14003" title="Other formats" id="oth-2411.14003" aria-labelledby="oth-2411.14003">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Generative Intervention Models for Causal Perturbation Modeling </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Schneider,+N">Nora Schneider</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lorch,+L">Lars Lorch</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kilbertus,+N">Niki Kilbertus</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sch%C3%B6lkopf,+B">Bernhard Sch枚lkopf</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Krause,+A">Andreas Krause</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Machine Learning (stat.ML) </div> <p class='mathjax'> We consider the problem of predicting perturbation effects via causal models. In many applications, it is a priori unknown which mechanisms of a system are modified by an external perturbation, even though the features of the perturbation are available. For example, in genomics, some properties of a drug may be known, but not their causal effects on the regulatory pathways of cells. We propose a generative intervention model (GIM) that learns to map these perturbation features to distributions over atomic interventions in a jointly-estimated causal model. Contrary to prior approaches, this enables us to predict the distribution shifts of unseen perturbation features while gaining insights about their mechanistic effects in the underlying data-generating process. On synthetic data and scRNA-seq drug perturbation data, GIMs achieve robust out-of-distribution predictions on par with unstructured approaches, while effectively inferring the underlying perturbation mechanisms, often better than other causal inference methods. </p> </div> </dd> <dt> <a name='item196'>[196]</a> <a href ="/abs/2411.14006" title="Abstract" id="2411.14006"> arXiv:2411.14006 </a> [<a href="/pdf/2411.14006" title="Download PDF" id="pdf-2411.14006" aria-labelledby="pdf-2411.14006">pdf</a>, <a href="https://arxiv.org/html/2411.14006v1" title="View HTML" id="html-2411.14006" aria-labelledby="html-2411.14006" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14006" title="Other formats" id="oth-2411.14006" aria-labelledby="oth-2411.14006">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Experimental comparison of graph-based approximate nearest neighbor search algorithms on edge devices </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ganbarov,+A">Ali Ganbarov</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yuan,+J">Jicheng Yuan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Le-Tuan,+A">Anh Le-Tuan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hauswirth,+M">Manfred Hauswirth</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Le-Phuoc,+D">Danh Le-Phuoc</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Data Structures and Algorithms (cs.DS)</span>; Hardware Architecture (cs.AR); Computer Vision and Pattern Recognition (cs.CV); Distributed, Parallel, and Cluster Computing (cs.DC); Performance (cs.PF) </div> <p class='mathjax'> In this paper, we present an experimental comparison of various graph-based approximate nearest neighbor (ANN) search algorithms deployed on edge devices for real-time nearest neighbor search applications, such as smart city infrastructure and autonomous vehicles. To the best of our knowledge, this specific comparative analysis has not been previously conducted. While existing research has explored graph-based ANN algorithms, it has often been limited to single-threaded implementations on standard commodity hardware. Our study leverages the full computational and storage capabilities of edge devices, incorporating additional metrics such as insertion and deletion latency of new vectors and power consumption. This comprehensive evaluation aims to provide valuable insights into the performance and suitability of these algorithms for edge-based real-time tracking systems enhanced by nearest-neighbor search algorithms. </p> </div> </dd> <dt> <a name='item197'>[197]</a> <a href ="/abs/2411.14007" title="Abstract" id="2411.14007"> arXiv:2411.14007 </a> [<a href="/pdf/2411.14007" title="Download PDF" id="pdf-2411.14007" aria-labelledby="pdf-2411.14007">pdf</a>, <a href="https://arxiv.org/html/2411.14007v1" title="View HTML" id="html-2411.14007" aria-labelledby="html-2411.14007" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14007" title="Other formats" id="oth-2411.14007" aria-labelledby="oth-2411.14007">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Approximating One-Sided and Two-Sided Nash Social Welfare With Capacities </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Gokhale,+S">Salil Gokhale</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sagar,+H">Harshul Sagar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Vaish,+R">Rohit Vaish</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yadav,+J">Jatin Yadav</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 28 pages, 1 figure </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Science and Game Theory (cs.GT)</span> </div> <p class='mathjax'> We study the problem of maximizing Nash social welfare, which is the geometric mean of agents' utilities, in two well-known models. The first model involves one-sided preferences, where a set of indivisible items is allocated among a group of agents (commonly studied in fair division). The second model deals with two-sided preferences, where a set of workers and firms, each having numerical valuations for the other side, are matched with each other (commonly studied in matching-under-preferences literature). We study these models under capacity constraints, which restrict the number of items (respectively, workers) that an agent (respectively, a firm) can receive. <br>We develop constant-factor approximation algorithms for both problems under a broad class of valuations. Specifically, our main results are the following: (a) For any $\epsilon > 0$, a $(6+\epsilon)$-approximation algorithm for the one-sided problem when agents have submodular valuations, and (b) a $1.33$-approximation algorithm for the two-sided problem when the firms have subadditive valuations. The former result provides the first constant-factor approximation algorithm for Nash welfare in the one-sided problem with submodular valuations and capacities, while the latter result improves upon an existing $\sqrt{OPT}$-approximation algorithm for additive valuations. Our result for the two-sided setting also establishes a computational separation between the Nash and utilitarian welfare objectives. We also complement our algorithms with hardness-of-approximation results. </p> </div> </dd> <dt> <a name='item198'>[198]</a> <a href ="/abs/2411.14008" title="Abstract" id="2411.14008"> arXiv:2411.14008 </a> [<a href="/pdf/2411.14008" title="Download PDF" id="pdf-2411.14008" aria-labelledby="pdf-2411.14008">pdf</a>, <a href="https://arxiv.org/html/2411.14008v1" title="View HTML" id="html-2411.14008" aria-labelledby="html-2411.14008" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14008" title="Other formats" id="oth-2411.14008" aria-labelledby="oth-2411.14008">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Simulated real-world upper-body Exoskeleton Accident and Investigation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Winfield,+A">Alan Winfield</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Webb,+N">Nicola Webb</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Etoundi,+A">Appolinaire Etoundi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Derval,+R">Romain Derval</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Salvini,+P">Pericle Salvini</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jirotka,+M">Marina Jirotka</a></div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> In proc. 9th International Conference on Robot Ethics and Standards (ICRES 2024), Yokohama, Japan, 29-31 July 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span> </div> <p class='mathjax'> This paper describes the enactment of a simulated (mock) accident involving an upper-body exoskeleton and its investigation. The accident scenario is enacted by role-playing volunteers, one of whom is wearing the exoskeleton. Following the mock accident, investigators - also volunteers - interview both the subject of the accident and relevant witnesses. The investigators then consider the witness testimony alongside robot data logged by the ethical black box, in order to address the three key questions: what happened?, why did it happen?, and how can we make changes to prevent the accident happening again? This simulated accident scenario is one of a series we have run as part of the RoboTIPS project, with the overall aim of developing and testing both processes and technologies to support social robot accident investigation. </p> </div> </dd> <dt> <a name='item199'>[199]</a> <a href ="/abs/2411.14009" title="Abstract" id="2411.14009"> arXiv:2411.14009 </a> [<a href="/pdf/2411.14009" title="Download PDF" id="pdf-2411.14009" aria-labelledby="pdf-2411.14009">pdf</a>, <a href="/format/2411.14009" title="Other formats" id="oth-2411.14009" aria-labelledby="oth-2411.14009">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> GPT versus Humans: Uncovering Ethical Concerns in Conversational Generative AI-empowered Multi-Robot Systems </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Rousi,+R">Rebekah Rousi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Makitalo,+N">Niko Makitalo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Samani,+H">Hooman Samani</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kemell,+K">Kai-Kristian Kemell</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=de+Cerqueira,+J+S">Jose Siqueira de Cerqueira</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Vakkuri,+V">Ville Vakkuri</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mikkonen,+T">Tommi Mikkonen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Abrahamsson,+P">Pekka Abrahamsson</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 51 pages, 10 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span>; Human-Computer Interaction (cs.HC); Multiagent Systems (cs.MA) </div> <p class='mathjax'> The emergence of generative artificial intelligence (GAI) and large language models (LLMs) such ChatGPT has enabled the realization of long-harbored desires in software and robotic development. The technology however, has brought with it novel ethical challenges. These challenges are compounded by the application of LLMs in other machine learning systems, such as multi-robot systems. The objectives of the study were to examine novel ethical issues arising from the application of LLMs in multi-robot systems. Unfolding ethical issues in GPT agent behavior (deliberation of ethical concerns) was observed, and GPT output was compared with human experts. The article also advances a model for ethical development of multi-robot systems. A qualitative workshop-based method was employed in three workshops for the collection of ethical concerns: two human expert workshops (N=16 participants) and one GPT-agent-based workshop (N=7 agents; two teams of 6 agents plus one judge). Thematic analysis was used to analyze the qualitative data. The results reveal differences between the human-produced and GPT-based ethical concerns. Human experts placed greater emphasis on new themes related to deviance, data privacy, bias and unethical corporate conduct. GPT agents emphasized concerns present in existing AI ethics guidelines. The study contributes to a growing body of knowledge in context-specific AI ethics and GPT application. It demonstrates the gap between human expert thinking and LLM output, while emphasizing new ethical concerns emerging in novel technology. </p> </div> </dd> <dt> <a name='item200'>[200]</a> <a href ="/abs/2411.14012" title="Abstract" id="2411.14012"> arXiv:2411.14012 </a> [<a href="/pdf/2411.14012" title="Download PDF" id="pdf-2411.14012" aria-labelledby="pdf-2411.14012">pdf</a>, <a href="https://arxiv.org/html/2411.14012v1" title="View HTML" id="html-2411.14012" aria-labelledby="html-2411.14012" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14012" title="Other formats" id="oth-2411.14012" aria-labelledby="oth-2411.14012">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Logic Augmented Generation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Gangemi,+A">Aldo Gangemi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nuzzolese,+A+G">Andrea Giovanni Nuzzolese</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 10 pages, 2 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Artificial Intelligence (cs.AI)</span>; Computation and Language (cs.CL) </div> <p class='mathjax'> Semantic Knowledge Graphs (SKG) face challenges with scalability, flexibility, contextual understanding, and handling unstructured or ambiguous information. However, they offer formal and structured knowledge enabling highly interpretable and reliable results by means of reasoning and querying. Large Language Models (LLMs) overcome those limitations making them suitable in open-ended tasks and unstructured environments. Nevertheless, LLMs are neither interpretable nor reliable. To solve the dichotomy between LLMs and SKGs we envision Logic Augmented Generation (LAG) that combines the benefits of the two worlds. LAG uses LLMs as Reactive Continuous Knowledge Graphs that can generate potentially infinite relations and tacit knowledge on-demand. SKGs are key for injecting a discrete heuristic dimension with clear logical and factual boundaries. We exemplify LAG in two tasks of collective intelligence, i.e., medical diagnostics and climate projections. Understanding the properties and limitations of LAG, which are still mostly unknown, is of utmost importance for enabling a variety of tasks involving tacit knowledge in order to provide interpretable and effective results. </p> </div> </dd> <dt> <a name='item201'>[201]</a> <a href ="/abs/2411.14014" title="Abstract" id="2411.14014"> arXiv:2411.14014 </a> [<a href="/pdf/2411.14014" title="Download PDF" id="pdf-2411.14014" aria-labelledby="pdf-2411.14014">pdf</a>, <a href="https://arxiv.org/html/2411.14014v1" title="View HTML" id="html-2411.14014" aria-labelledby="html-2411.14014" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14014" title="Other formats" id="oth-2411.14014" aria-labelledby="oth-2411.14014">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Trajectory Representation Learning on Road Networks and Grids with Spatio-Temporal Dynamics </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Schestakov,+S">Stefan Schestakov</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gottschalk,+S">Simon Gottschalk</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Trajectory representation learning is a fundamental task for applications in fields including smart city, and urban planning, as it facilitates the utilization of trajectory data (e.g., vehicle movements) for various downstream applications, such as trajectory similarity computation or travel time estimation. This is achieved by learning low-dimensional representations from high-dimensional and raw trajectory data. However, existing methods for trajectory representation learning either rely on grid-based or road-based representations, which are inherently different and thus, could lose information contained in the other modality. Moreover, these methods overlook the dynamic nature of urban traffic, relying on static road network features rather than time varying traffic patterns. In this paper, we propose TIGR, a novel model designed to integrate grid and road network modalities while incorporating spatio-temporal dynamics to learn rich, general-purpose representations of trajectories. We evaluate TIGR on two realworld datasets and demonstrate the effectiveness of combining both modalities by substantially outperforming state-of-the-art methods, i.e., up to 43.22% for trajectory similarity, up to 16.65% for travel time estimation, and up to 10.16% for destination prediction. </p> </div> </dd> <dt> <a name='item202'>[202]</a> <a href ="/abs/2411.14019" title="Abstract" id="2411.14019"> arXiv:2411.14019 </a> [<a href="/pdf/2411.14019" title="Download PDF" id="pdf-2411.14019" aria-labelledby="pdf-2411.14019">pdf</a>, <a href="https://arxiv.org/html/2411.14019v1" title="View HTML" id="html-2411.14019" aria-labelledby="html-2411.14019" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14019" title="Other formats" id="oth-2411.14019" aria-labelledby="oth-2411.14019">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Time-Scale Separation in Q-Learning: Extending TD($\triangle$) for Action-Value Function Decomposition </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Humayoo,+M">Mahammad Humayoo</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 17 pages </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Machine Learning (stat.ML) </div> <p class='mathjax'> Q-Learning is a fundamental off-policy reinforcement learning (RL) algorithm that has the objective of approximating action-value functions in order to learn optimal policies. Nonetheless, it has difficulties in reconciling bias with variance, particularly in the context of long-term rewards. This paper introduces Q($\Delta$)-Learning, an extension of TD($\Delta$) for the Q-Learning framework. TD($\Delta$) facilitates efficient learning over several time scales by breaking the Q($\Delta$)-function into distinct discount factors. This approach offers improved learning stability and scalability, especially for long-term tasks where discounting bias may impede convergence. Our methodology guarantees that each element of the Q($\Delta$)-function is acquired individually, facilitating expedited convergence on shorter time scales and enhancing the learning of extended time scales. We demonstrate through theoretical analysis and practical evaluations on standard benchmarks like Atari that Q($\Delta$)-Learning surpasses conventional Q-Learning and TD learning methods in both tabular and deep RL environments. </p> </div> </dd> <dt> <a name='item203'>[203]</a> <a href ="/abs/2411.14025" title="Abstract" id="2411.14025"> arXiv:2411.14025 </a> [<a href="/pdf/2411.14025" title="Download PDF" id="pdf-2411.14025" aria-labelledby="pdf-2411.14025">pdf</a>, <a href="https://arxiv.org/html/2411.14025v1" title="View HTML" id="html-2411.14025" aria-labelledby="html-2411.14025" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14025" title="Other formats" id="oth-2411.14025" aria-labelledby="oth-2411.14025">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> RISecure-PUF: Multipurpose PUF-Driven Security Extensions with Lookaside Buffer in RISC-V </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+C">Chenghao Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+X">Xiaolin Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Qin,+K">Kailun Qin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+T">Tengfei Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shi,+Y">Yipeng Shi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+T">Tianyi Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+C">Chi Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gu,+D">Dawu Gu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Cryptography and Security (cs.CR)</span> </div> <p class='mathjax'> RISC-V's limited security features hinder its use in confidential computing and heterogeneous platforms. This paper introduces RISecure-PUF, a security extension utilizing existing Physical Unclonable Functions for key generation and secure protocol purposes. A one-way hash function is integrated to ensure provable security against modeling attacks, while a lookaside buffer accelerates batch sampling and minimizes reliance on error correction codes. Implemented on the Genesys 2 FPGA, RISecure-PUF improves at least $2.72\times$ in batch scenarios with negligible hardware overhead and a maximum performance reduction of $10.7\%$, enabled by reusing the hash function module in integrated environments such as cryptographic engines. </p> </div> </dd> <dt> <a name='item204'>[204]</a> <a href ="/abs/2411.14029" title="Abstract" id="2411.14029"> arXiv:2411.14029 </a> [<a href="/pdf/2411.14029" title="Download PDF" id="pdf-2411.14029" aria-labelledby="pdf-2411.14029">pdf</a>, <a href="https://arxiv.org/html/2411.14029v1" title="View HTML" id="html-2411.14029" aria-labelledby="html-2411.14029" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14029" title="Other formats" id="oth-2411.14029" aria-labelledby="oth-2411.14029">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Relation-aware based Siamese Denoising Autoencoder for Malware Few-shot Classification </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+J">Jinting Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jang-Jaccard,+J">Julian Jang-Jaccard</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Welch,+I">Ian Welch</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=AI-Sahaf,+H">Harith AI-Sahaf</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Camtepe,+S">Seyit Camtepe</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dunmore,+A">Aeryn Dunmore</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lab,+C">Cybersecurity Lab</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Cryptography and Security (cs.CR)</span> </div> <p class='mathjax'> When malware employs an unseen zero-day exploit, traditional security measures such as vulnerability scanners and antivirus software can fail to detect them. This is because these tools rely on known patches and signatures, which do not exist for new zero-day attacks. Furthermore, existing machine learning methods, which are trained on specific and occasionally outdated malware samples, may struggle to adapt to features in new malware. To address this issue, there is a need for a more robust machine learning model that can identify relationships between malware samples without being trained on a particular malware feature set. This is particularly crucial in the field of cybersecurity, where the number of malware samples is limited and obfuscation techniques are widely used. Current approaches using stacked autoencoders aim to remove the noise introduced by obfuscation techniques through reconstruction of the input. However, this approach ignores the semantic relationships between features across different malware samples. To overcome this limitation, we propose a novel Siamese Neural Network (SNN) that uses relation-aware embeddings to calculate more accurate similarity probabilities based on semantic details of different malware samples. In addition, by using entropy images as inputs, our model can extract better structural information and subtle differences in malware signatures, even in the presence of obfuscation techniques. Evaluations on two large malware sample sets using the N-shot and N-way methods show that our proposed model is highly effective in predicting previously unseen malware, even in the presence of obfuscation techniques. </p> </div> </dd> <dt> <a name='item205'>[205]</a> <a href ="/abs/2411.14030" title="Abstract" id="2411.14030"> arXiv:2411.14030 </a> [<a href="/pdf/2411.14030" title="Download PDF" id="pdf-2411.14030" aria-labelledby="pdf-2411.14030">pdf</a>, <a href="https://arxiv.org/html/2411.14030v1" title="View HTML" id="html-2411.14030" aria-labelledby="html-2411.14030" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14030" title="Other formats" id="oth-2411.14030" aria-labelledby="oth-2411.14030">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Performance Analysis of STAR-RIS-Assisted Cell-Free Massive MIMO Systems with Electromagnetic Interference and Phase Errors </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Qian,+J">Jun Qian</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Murch,+R">Ross Murch</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Letaief,+K+B">Khaled B. Letaief</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 13 pages, 6 figures. This work has been submitted to the IEEE for possible publication </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Information Theory (cs.IT)</span>; Signal Processing (eess.SP) </div> <p class='mathjax'> Simultaneous Transmitting and Reflecting Reconfigurable Intelligent Surfaces (STAR-RISs) are being explored for the next generation of sixth-generation (6G) networks. A promising configuration for their deployment is within cell-free massive multiple-input multiple-output (MIMO) systems. However, despite the advantages that STAR-RISs could bring, challenges such as electromagnetic interference (EMI) and phase errors may lead to significant performance degradation. In this paper, we investigate the impact of EMI and phase errors on STAR-RIS-assisted cell-free massive MIMO systems and propose techniques to mitigate these effects. We introduce a novel projected gradient descent (GD) algorithm for STAR-RIS coefficient matrix design by minimizing the local channel estimation normalised mean square error. We also derive the closed-form expressions of the uplink and downlink spectral efficiency (SE) to analyze system performance with EMI and phase errors, in which fractional power control methods are applied for performance improvement. The results reveal that the projected GD algorithm can effectively tackle EMI and phase errors to improve estimation accuracy and compensate for performance degradation with nearly $10\%\sim20\%$ SE improvement. Moreover, increasing access points (APs), antennas per AP, and STAR-RIS elements can also improve SE performance. Applying STAR-RIS in the proposed system achieves a larger $25\%$-likely SE than conventional RISs. However, the advantages of employing more STAR-RIS elements are reduced when EMI is severe. </p> </div> </dd> <dt> <a name='item206'>[206]</a> <a href ="/abs/2411.14033" title="Abstract" id="2411.14033"> arXiv:2411.14033 </a> [<a href="/pdf/2411.14033" title="Download PDF" id="pdf-2411.14033" aria-labelledby="pdf-2411.14033">pdf</a>, <a href="https://arxiv.org/html/2411.14033v1" title="View HTML" id="html-2411.14033" aria-labelledby="html-2411.14033" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14033" title="Other formats" id="oth-2411.14033" aria-labelledby="oth-2411.14033">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Multi-LLM-Agent Systems: Techniques and Business Perspectives </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+Y">Yingxuan Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Peng,+Q">Qiuying Peng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+J">Jun Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+W">Weinan Zhang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Artificial Intelligence (cs.AI)</span> </div> <p class='mathjax'> In the era of (multi-modal) large language models, most operational processes can be reformulated and reproduced using LLM agents. The LLM agents can perceive, control, and get feedback from the environment so as to accomplish the given tasks in an autonomous manner. Besides the environment-interaction property, the LLM agents can call various external tools to ease the task completion process. The tools can be regarded as a predefined operational process with private or real-time knowledge that does not exist in the parameters of LLMs. As a natural trend of development, the tools for calling are becoming autonomous agents, thus the full intelligent system turns out to be a multi-LLM-agent system (MLAS). This paper discusses the technical and business landscapes of MLAS. Compared to the previous single-LLM-agent system, a MLAS has the advantages of i) higher potential of task-solving performance, ii) higher flexibility for system changing, iii) proprietary data preserving for each participating entity, and iv) feasibility of monetization for each entity. To support the ecosystem of MLAS, we provide a preliminary version of such MLAS protocol considering technical requirements, data privacy, and business incentives. As such, MLAS would be a practical solution to achieve artificial collective intelligence in the near future. </p> </div> </dd> <dt> <a name='item207'>[207]</a> <a href ="/abs/2411.14035" title="Abstract" id="2411.14035"> arXiv:2411.14035 </a> [<a href="/pdf/2411.14035" title="Download PDF" id="pdf-2411.14035" aria-labelledby="pdf-2411.14035">pdf</a>, <a href="https://arxiv.org/html/2411.14035v1" title="View HTML" id="html-2411.14035" aria-labelledby="html-2411.14035" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14035" title="Other formats" id="oth-2411.14035" aria-labelledby="oth-2411.14035">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Teaching MLPs to Master Heterogeneous Graph-Structured Knowledge for Efficient and Accurate Inference </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Y">Yunhui Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gao,+X">Xinyi Gao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=He,+T">Tieke He</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+J">Jianhua Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yin,+H">Hongzhi Yin</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> Heterogeneous Graph Neural Networks (HGNNs) have achieved promising results in various heterogeneous graph learning tasks, owing to their superiority in capturing the intricate relationships and diverse relational semantics inherent in heterogeneous graph structures. However, the neighborhood-fetching latency incurred by structure dependency in HGNNs makes it challenging to deploy for latency-constrained applications that require fast inference. Inspired by recent GNN-to-MLP knowledge distillation frameworks, we introduce HG2M and HG2M+ to combine both HGNN's superior performance and MLP's efficient inference. HG2M directly trains student MLPs with node features as input and soft labels from teacher HGNNs as targets, and HG2M+ further distills reliable and heterogeneous semantic knowledge into student MLPs through reliable node distillation and reliable meta-path distillation. Experiments conducted on six heterogeneous graph datasets show that despite lacking structural dependencies, HG2Ms can still achieve competitive or even better performance than HGNNs and significantly outperform vanilla MLPs. Moreover, HG2Ms demonstrate a 379.24$\times$ speedup in inference over HGNNs on the large-scale IGB-3M-19 dataset, showcasing their ability for latency-sensitive deployments. </p> </div> </dd> <dt> <a name='item208'>[208]</a> <a href ="/abs/2411.14038" title="Abstract" id="2411.14038"> arXiv:2411.14038 </a> [<a href="/pdf/2411.14038" title="Download PDF" id="pdf-2411.14038" aria-labelledby="pdf-2411.14038">pdf</a>, <a href="https://arxiv.org/html/2411.14038v1" title="View HTML" id="html-2411.14038" aria-labelledby="html-2411.14038" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14038" title="Other formats" id="oth-2411.14038" aria-labelledby="oth-2411.14038">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Minimum Monotone Spanning Trees </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Di+Giacomo,+E">Emilio Di Giacomo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Didimo,+W">Walter Didimo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Katsanou,+E">Eleni Katsanou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Schlipf,+L">Lena Schlipf</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Symvonis,+A">Antonios Symvonis</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wolff,+A">Alexander Wolff</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> To appear in Proc. 50th International Conference on Current Trends in Theory and Practice of Computer Science (SOFSEM 2025) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computational Geometry (cs.CG)</span> </div> <p class='mathjax'> Computing a Euclidean minimum spanning tree of a set of points is a seminal problem in computational geometry and geometric graph theory. We combine it with another classical problem in graph drawing, namely computing a monotone geometric representation of a given graph. More formally, given a finite set $S$ of points in the plane and a finite set $\cal D$ of directions, a geometric spanning tree $T$ with vertex set $S$ is ${\cal D}$-monotone if, for every pair $\{u,v\}$ of vertices of $T$, there exists a direction $d \in \cal D$ for which the unique path from $u$ to $v$ in $T$ is monotone with respect to $d$. We provide a characterization of ${\cal D}$-monotone spanning trees. Based on it, we show that a ${\cal D}$-monotone spanning tree of minimum length can be computed in polynomial time if the number $k=|{\cal D}|$ of directions is fixed, both when (i) the set ${\cal D}$ of directions is prescribed and when (ii) the objective is to find a minimum-length ${\cal D}$-monotone spanning tree over all sets ${\cal D}$ of $k$ directions. For $k = 2$, we describe algorithms that are much faster than those for the general case. Furthermore, in contrast to the classical Euclidean minimum spanning tree, whose vertex degree is at most six, we show that for every even integer $k$, there exists a point set $S_k$ and a set $\cal D$ of $k$ directions such that any minimum-length $\cal D$-monotone spanning tree of $S_k$ has maximum vertex degree $2k$. </p> </div> </dd> <dt> <a name='item209'>[209]</a> <a href ="/abs/2411.14039" title="Abstract" id="2411.14039"> arXiv:2411.14039 </a> [<a href="/pdf/2411.14039" title="Download PDF" id="pdf-2411.14039" aria-labelledby="pdf-2411.14039">pdf</a>, <a href="https://arxiv.org/html/2411.14039v1" title="View HTML" id="html-2411.14039" aria-labelledby="html-2411.14039" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14039" title="Other formats" id="oth-2411.14039" aria-labelledby="oth-2411.14039">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Uterine Ultrasound Image Captioning Using Deep Learning Techniques </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Boulesnane,+A">Abdennour Boulesnane</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mokhtari,+B">Boutheina Mokhtari</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Segueni,+O+R">Oumnia Rana Segueni</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Segueni,+S">Slimane Segueni</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Medical imaging has significantly revolutionized medical diagnostics and treatment planning, progressing from early X-ray usage to sophisticated methods like MRIs, CT scans, and ultrasounds. This paper investigates the use of deep learning for medical image captioning, with a particular focus on uterine ultrasound images. These images are vital in obstetrics and gynecology for diagnosing and monitoring various conditions across different age groups. However, their interpretation is often challenging due to their complexity and variability. To address this, a deep learning-based medical image captioning system was developed, integrating Convolutional Neural Networks with a Bidirectional Gated Recurrent Unit network. This hybrid model processes both image and text features to generate descriptive captions for uterine ultrasound images. Our experimental results demonstrate the effectiveness of this approach over baseline methods, with the proposed model achieving superior performance in generating accurate and informative captions, as indicated by higher BLEU and ROUGE scores. By enhancing the interpretation of uterine ultrasound images, our research aims to assist medical professionals in making timely and accurate diagnoses, ultimately contributing to improved patient care. </p> </div> </dd> <dt> <a name='item210'>[210]</a> <a href ="/abs/2411.14042" title="Abstract" id="2411.14042"> arXiv:2411.14042 </a> [<a href="/pdf/2411.14042" title="Download PDF" id="pdf-2411.14042" aria-labelledby="pdf-2411.14042">pdf</a>, <a href="https://arxiv.org/html/2411.14042v1" title="View HTML" id="html-2411.14042" aria-labelledby="html-2411.14042" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14042" title="Other formats" id="oth-2411.14042" aria-labelledby="oth-2411.14042">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Forecasting Future International Events: A Reliable Dataset for Text-Based Event Modeling </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Gwak,+D">Daehoon Gwak</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Park,+J">Junwoo Park</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Park,+M">Minho Park</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Park,+C">Chaehun Park</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lee,+H">Hyunchan Lee</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Choi,+E">Edward Choi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Choo,+J">Jaegul Choo</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> EMNLP 2024 Findings </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Predicting future international events from textual information, such as news articles, has tremendous potential for applications in global policy, strategic decision-making, and geopolitics. However, existing datasets available for this task are often limited in quality, hindering the progress of related research. In this paper, we introduce WORLDREP (WORLD Relationship and Event Prediction), a novel dataset designed to address these limitations by leveraging the advanced reasoning capabilities of large-language models (LLMs). Our dataset features high-quality scoring labels generated through advanced prompt modeling and rigorously validated by domain experts in political science. We showcase the quality and utility of WORLDREP for real-world event prediction tasks, demonstrating its effectiveness through extensive experiments and analysis. Furthermore, we publicly release our dataset along with the full automation source code for data collection, labeling, and benchmarking, aiming to support and advance research in text-based event prediction. </p> </div> </dd> <dt> <a name='item211'>[211]</a> <a href ="/abs/2411.14046" title="Abstract" id="2411.14046"> arXiv:2411.14046 </a> [<a href="/pdf/2411.14046" title="Download PDF" id="pdf-2411.14046" aria-labelledby="pdf-2411.14046">pdf</a>, <a href="https://arxiv.org/html/2411.14046v1" title="View HTML" id="html-2411.14046" aria-labelledby="html-2411.14046" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14046" title="Other formats" id="oth-2411.14046" aria-labelledby="oth-2411.14046">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> REFOL: Resource-Efficient Federated Online Learning for Traffic Flow Forecasting </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Q">Qingxiang Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sun,+S">Sheng Sun</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liang,+Y">Yuxuan Liang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+X">Xiaolong Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+M">Min Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bilal,+M">Muhammad Bilal</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Y">Yuwei Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+X">Xujing Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zheng,+Y">Yu Zheng</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> Multiple federated learning (FL) methods are proposed for traffic flow forecasting (TFF) to avoid heavy-transmission and privacy-leaking concerns resulting from the disclosure of raw data in centralized methods. However, these FL methods adopt offline learning which may yield subpar performance, when concept drift occurs, i.e., distributions of historical and future data vary. Online learning can detect concept drift during model training, thus more applicable to TFF. Nevertheless, the existing federated online learning method for TFF fails to efficiently solve the concept drift problem and causes tremendous computing and communication overhead. Therefore, we propose a novel method named Resource-Efficient Federated Online Learning (REFOL) for TFF, which guarantees prediction performance in a communication-lightweight and computation-efficient way. Specifically, we design a data-driven client participation mechanism to detect the occurrence of concept drift and determine clients' participation necessity. Subsequently, we propose an adaptive online optimization strategy, which guarantees prediction performance and meanwhile avoids meaningless model updates. Then, a graph convolution-based model aggregation mechanism is designed, aiming to assess participants' contribution based on spatial correlation without importing extra communication and computing consumption on clients. Finally, we conduct extensive experiments on real-world datasets to demonstrate the superiority of REFOL in terms of prediction improvement and resource economization. </p> </div> </dd> <dt> <a name='item212'>[212]</a> <a href ="/abs/2411.14049" title="Abstract" id="2411.14049"> arXiv:2411.14049 </a> [<a href="/pdf/2411.14049" title="Download PDF" id="pdf-2411.14049" aria-labelledby="pdf-2411.14049">pdf</a>, <a href="https://arxiv.org/html/2411.14049v1" title="View HTML" id="html-2411.14049" aria-labelledby="html-2411.14049" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14049" title="Other formats" id="oth-2411.14049" aria-labelledby="oth-2411.14049">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Out-Of-Distribution Detection with Diversification (Provably) </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Yao,+H">Haiyun Yao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Han,+Z">Zongbo Han</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fu,+H">Huazhu Fu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Peng,+X">Xi Peng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hu,+Q">Qinghua Hu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+C">Changqing Zhang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> Out-of-distribution (OOD) detection is crucial for ensuring reliable deployment of machine learning models. Recent advancements focus on utilizing easily accessible auxiliary outliers (e.g., data from the web or other datasets) in training. However, we experimentally reveal that these methods still struggle to generalize their detection capabilities to unknown OOD data, due to the limited diversity of the auxiliary outliers collected. Therefore, we thoroughly examine this problem from the generalization perspective and demonstrate that a more diverse set of auxiliary outliers is essential for enhancing the detection capabilities. However, in practice, it is difficult and costly to collect sufficiently diverse auxiliary outlier data. Therefore, we propose a simple yet practical approach with a theoretical guarantee, termed Diversity-induced Mixup for OOD detection (diverseMix), which enhances the diversity of auxiliary outlier set for training in an efficient way. Extensive experiments show that diverseMix achieves superior performance on commonly used and recent challenging large-scale benchmarks, which further confirm the importance of the diversity of auxiliary outliers. </p> </div> </dd> <dt> <a name='item213'>[213]</a> <a href ="/abs/2411.14052" title="Abstract" id="2411.14052"> arXiv:2411.14052 </a> [<a href="/pdf/2411.14052" title="Download PDF" id="pdf-2411.14052" aria-labelledby="pdf-2411.14052">pdf</a>, <a href="https://arxiv.org/html/2411.14052v1" title="View HTML" id="html-2411.14052" aria-labelledby="html-2411.14052" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14052" title="Other formats" id="oth-2411.14052" aria-labelledby="oth-2411.14052">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Dynamic Trajectory and Power Control in Ultra-Dense UAV Networks: A Mean-Field Reinforcement Learning Approach </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Song,+F">Fei Song</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Wang,+Z">Zhe Wang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Li,+J">Jun Li</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Shi,+L">Long Shi</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Chen,+W">Wen Chen</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Jin,+S">Shi Jin</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Systems and Control (eess.SY)</span> </div> <p class='mathjax'> In ultra-dense unmanned aerial vehicle (UAV) networks, it is challenging to coordinate the resource allocation and interference management among large-scale UAVs, for providing flexible and efficient service coverage to the ground users (GUs). In this paper, we propose a learning-based resource allocation scheme in an ultra-dense UAV communication network, where the GUs' service demands are time-varying with unknown distributions. We formulate the non-cooperative game among multiple co-channel UAVs as a stochastic game, where each UAV jointly optimizes its trajectory, user association, and downlink power control to maximize the expectation of its locally cumulative energy efficiency under the interference and energy constraints. To cope with the scalability issue in a large-scale network, we further formulate the problem as a mean-field game (MFG), which simplifies the interactions among the UAVs into a two-player game between a representative UAV and a mean-field. We prove the existence and uniqueness of the equilibrium for the MFG, and propose a model-free mean-field reinforcement learning algorithm named maximum entropy mean-field deep Q network (ME-MFDQN) to solve the mean-field equilibrium in both fully and partially observable scenarios. The simulation results reveal that the proposed algorithm improves the energy efficiency compared with the benchmark algorithms. Moreover, the performance can be further enhanced if the GUs' service demands exhibit higher temporal correlation or if the UAVs have wider observation capabilities over their nearby GUs. </p> </div> </dd> <dt> <a name='item214'>[214]</a> <a href ="/abs/2411.14053" title="Abstract" id="2411.14053"> arXiv:2411.14053 </a> [<a href="/pdf/2411.14053" title="Download PDF" id="pdf-2411.14053" aria-labelledby="pdf-2411.14053">pdf</a>, <a href="https://arxiv.org/html/2411.14053v1" title="View HTML" id="html-2411.14053" aria-labelledby="html-2411.14053" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14053" title="Other formats" id="oth-2411.14053" aria-labelledby="oth-2411.14053">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Stereo Anything: Unifying Stereo Matching with Large-Scale Mixed Data </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Guo,+X">Xianda Guo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+C">Chenming Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Y">Youmin Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nie,+D">Dujun Nie</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+R">Ruilin Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zheng,+W">Wenzhao Zheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Poggi,+M">Matteo Poggi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+L">Long Chen</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Code will be available at \url{<a href="https://github.com/XiandaGuo/OpenStereo" rel="external noopener nofollow" class="link-external link-https">this https URL</a>} </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Stereo matching has been a pivotal component in 3D vision, aiming to find corresponding points between pairs of stereo images to recover depth information. In this work, we introduce StereoAnything, a highly practical solution for robust stereo matching. Rather than focusing on a specialized model, our goal is to develop a versatile foundational model capable of handling stereo images across diverse environments. To this end, we scale up the dataset by collecting labeled stereo images and generating synthetic stereo pairs from unlabeled monocular images. To further enrich the model's ability to generalize across different conditions, we introduce a novel synthetic dataset that complements existing data by adding variability in baselines, camera angles, and scene types. We extensively evaluate the zero-shot capabilities of our model on five public datasets, showcasing its impressive ability to generalize to new, unseen data. Code will be available at \url{<a href="https://github.com/XiandaGuo/OpenStereo" rel="external noopener nofollow" class="link-external link-https">this https URL</a>}. </p> </div> </dd> <dt> <a name='item215'>[215]</a> <a href ="/abs/2411.14054" title="Abstract" id="2411.14054"> arXiv:2411.14054 </a> [<a href="/pdf/2411.14054" title="Download PDF" id="pdf-2411.14054" aria-labelledby="pdf-2411.14054">pdf</a>, <a href="https://arxiv.org/html/2411.14054v1" title="View HTML" id="html-2411.14054" aria-labelledby="html-2411.14054" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14054" title="Other formats" id="oth-2411.14054" aria-labelledby="oth-2411.14054">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> FunctionChat-Bench: Comprehensive Evaluation of Language Models' Generative Capabilities in Korean Tool-use Dialogs </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Lee,+S">Shinbok Lee</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Seo,+G">Gaeun Seo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lee,+D">Daniel Lee</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ko,+B">Byeongil Ko</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jung,+S">Sunghee Jung</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shin,+M">Myeongcheol Shin</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 8 pages </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> This study investigates language models' generative capabilities in tool-use dialogs. We categorize the models' outputs in tool-use dialogs into four distinct types: Tool Call, Answer Completion, Slot Question, and Relevance Detection, which serve as aspects for evaluation. We introduce FunctionChat-Bench, comprising 700 evaluation items and automated assessment programs. Using this benchmark, we evaluate several language models that support function calling. Our findings indicate that while language models may exhibit high accuracy in single-turn Tool Call scenarios, this does not necessarily translate to superior generative performance in multi-turn environments. We argue that the capabilities required for function calling extend beyond generating tool call messages; they must also effectively generate conversational messages that engage the user. </p> </div> </dd> <dt> <a name='item216'>[216]</a> <a href ="/abs/2411.14055" title="Abstract" id="2411.14055"> arXiv:2411.14055 </a> [<a href="/pdf/2411.14055" title="Download PDF" id="pdf-2411.14055" aria-labelledby="pdf-2411.14055">pdf</a>, <a href="https://arxiv.org/html/2411.14055v1" title="View HTML" id="html-2411.14055" aria-labelledby="html-2411.14055" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14055" title="Other formats" id="oth-2411.14055" aria-labelledby="oth-2411.14055">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> DRPruning: Efficient Large Language Model Pruning through Distributionally Robust Optimization </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Deng,+H">Hexuan Deng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jiao,+W">Wenxiang Jiao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+X">Xuebo Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+M">Min Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tu,+Z">Zhaopeng Tu</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Work in Progress </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span> </div> <p class='mathjax'> Large language models (LLMs) deliver impressive results but face challenges from increasing model sizes and computational costs. Structured pruning reduces model size and speeds up inference but often causes uneven degradation across domains, leading to biased performance. To address this, we propose DRPruning, which incorporates distributionally robust optimization to restore balanced performance across domains, along with further improvements to enhance robustness. Experiments in monolingual and multilingual settings show that our method surpasses similarly sized models in pruning and continued pretraining over perplexity, downstream tasks, and instruction tuning. We further provide analysis demonstrating the robustness of our method towards various domains and distribution shifts. Furthermore, our method automatically determines optimal reference losses and data ratios, suggesting potential for broader applications. Our code is available at <a href="https://github.com/hexuandeng/DRPruning" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item217'>[217]</a> <a href ="/abs/2411.14062" title="Abstract" id="2411.14062"> arXiv:2411.14062 </a> [<a href="/pdf/2411.14062" title="Download PDF" id="pdf-2411.14062" aria-labelledby="pdf-2411.14062">pdf</a>, <a href="https://arxiv.org/html/2411.14062v1" title="View HTML" id="html-2411.14062" aria-labelledby="html-2411.14062" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14062" title="Other formats" id="oth-2411.14062" aria-labelledby="oth-2411.14062">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> MMGenBench: Evaluating the Limits of LMMs from the Text-to-Image Generation Perspective </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+H">Hailang Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Y">Yong Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+Z">Zixuan Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+H">Huaqiu Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+T">Tongwen Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chu,+X">Xiangxiang Chu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+R">Richong Zhang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> This project is available at: <a href="https://github.com/lerogo/MMGenBench" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI); Computation and Language (cs.CL); Machine Learning (cs.LG) </div> <p class='mathjax'> Large Multimodal Models (LMMs) have demonstrated remarkable capabilities. While existing benchmarks for evaluating LMMs mainly focus on image comprehension, few works evaluate them from the image generation perspective. To address this issue, we propose a straightforward automated evaluation pipeline. Specifically, this pipeline requires LMMs to generate an image-prompt from a given input image. Subsequently, it employs text-to-image generative models to create a new image based on these generated prompts. Finally, we evaluate the performance of LMMs by comparing the original image with the generated one. Furthermore, we introduce MMGenBench-Test, a comprehensive benchmark developed to evaluate LMMs across 13 distinct image patterns, and MMGenBench-Domain, targeting the performance evaluation of LMMs within the generative image domain. A thorough evaluation involving over 50 popular LMMs demonstrates the effectiveness and reliability in both the pipeline and benchmark. Our observations indicate that numerous LMMs excelling in existing benchmarks fail to adequately complete the basic tasks, related to image understanding and description. This finding highlights the substantial potential for performance improvement in current LMMs and suggests avenues for future model optimization. Concurrently, our pipeline facilitates the efficient assessment of LMMs performance across diverse domains by using solely image inputs. </p> </div> </dd> <dt> <a name='item218'>[218]</a> <a href ="/abs/2411.14064" title="Abstract" id="2411.14064"> arXiv:2411.14064 </a> [<a href="/pdf/2411.14064" title="Download PDF" id="pdf-2411.14064" aria-labelledby="pdf-2411.14064">pdf</a>, <a href="/format/2411.14064" title="Other formats" id="oth-2411.14064" aria-labelledby="oth-2411.14064">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Multi LoRA Meets Vision: Merging multiple adapters to create a multi task model </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Kesim,+E">Ege Kesim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Helli,+S+S">Selahattin Serdar Helli</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Parameter efficient finetuning (PEFT) methods are widely used in LLMs and generative models in computer vision. Especially one can use multiple of these during inference to change the behavior of the base model. In this paper we investigated whether multiple LoRA adapters trained on computer vision tasks can be merged together and used during inference without loss in performance. By achieving this, multitask models can be created just by merging different LoRAs. Merging these will reduce inference time and it will not require any additional retraining. We have trained adapters on six different tasks and evaluated their performance when they are merged together. For comparison we used a model with a frozen backbone and finetuned its head. Our results show that even with simple merging techniques creating a multitask model by merging adapters is achievable by slightly loosing performance in some cases. In our experiments we merged up to three adapters together. Depending on the task and the similarity of the data adapters were trained on, merges can outperform head finetuning. We have observed that LoRAs trained with dissimilar datasets tend to perform better compared to model trained on similar datasets. </p> </div> </dd> <dt> <a name='item219'>[219]</a> <a href ="/abs/2411.14067" title="Abstract" id="2411.14067"> arXiv:2411.14067 </a> [<a href="/pdf/2411.14067" title="Download PDF" id="pdf-2411.14067" aria-labelledby="pdf-2411.14067">pdf</a>, <a href="https://arxiv.org/html/2411.14067v1" title="View HTML" id="html-2411.14067" aria-labelledby="html-2411.14067" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14067" title="Other formats" id="oth-2411.14067" aria-labelledby="oth-2411.14067">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Quadratic Lower Bound for Simulation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Groote,+J+F">Jan Friso Groote</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Martens,+J">Jan Martens</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Logic in Computer Science (cs.LO)</span> </div> <p class='mathjax'> We show that deciding simulation equivalence and simulation preorder have quadratic lower bounds assuming that the Strong Exponential Time Hypothesis holds. This is in line with the best know quadratic upper bounds of simulation equivalence. This means that deciding simulation is inherently quadratic. A typical consequence of this result is that computing simulation equivalence is fundamentally harder than bisimilarity. </p> </div> </dd> <dt> <a name='item220'>[220]</a> <a href ="/abs/2411.14070" title="Abstract" id="2411.14070"> arXiv:2411.14070 </a> [<a href="/pdf/2411.14070" title="Download PDF" id="pdf-2411.14070" aria-labelledby="pdf-2411.14070">pdf</a>, <a href="https://arxiv.org/html/2411.14070v1" title="View HTML" id="html-2411.14070" aria-labelledby="html-2411.14070" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14070" title="Other formats" id="oth-2411.14070" aria-labelledby="oth-2411.14070">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Towards Adaptive Asynchronous Federated Learning for Human Activity Recognition </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Gajanin,+R">Rastko Gajanin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Danilenka,+A">Anastasiya Danilenka</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Morichetta,+A">Andrea Morichetta</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nastic,+S">Stefan Nastic</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Distributed, Parallel, and Cluster Computing (cs.DC)</span> </div> <p class='mathjax'> In this work, we tackle the problem of performing multi-label classification in the case of extremely heterogeneous data and with decentralized Machine Learning. Solving this issue is very important in IoT scenarios, where data coming from various sources, collected by heterogeneous devices, serve the learning of a distributed ML model through Federated Learning (FL). Specifically, we focus on the combination of FL applied to Human Activity Recognition HAR), where the task is to detect which kind of movements or actions individuals perform. In this case, transitioning from centralized learning (CL) to federated learning is non-trivial as HAR displays heterogeneity in action and devices, leading to significant skews in label and feature distributions. We address this scenario by presenting concrete solutions and tools for transitioning from centralized to FL for non-IID scenarios, outlining the main design decisions that need to be taken. Leveraging an open-sourced HAR dataset, we experimentally evaluate the effects that data augmentation, scaling, optimizer, learning rate, and batch size choices have on the performance of resulting machine learning models. Some of our main findings include using SGD-m as an optimizer, global feature scaling across clients, and persistent feature skew in the presence of heterogeneous HAR data. Finally, we provide an open-source extension of the Flower framework that enables asynchronous FL. </p> </div> </dd> <dt> <a name='item221'>[221]</a> <a href ="/abs/2411.14072" title="Abstract" id="2411.14072"> arXiv:2411.14072 </a> [<a href="/pdf/2411.14072" title="Download PDF" id="pdf-2411.14072" aria-labelledby="pdf-2411.14072">pdf</a>, <a href="https://arxiv.org/html/2411.14072v1" title="View HTML" id="html-2411.14072" aria-labelledby="html-2411.14072" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14072" title="Other formats" id="oth-2411.14072" aria-labelledby="oth-2411.14072">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> The Master-Slave Encoder Model for Improving Patent Text Summarization: A New Approach to Combining Specifications and Claims </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+S">Shu Zhou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+X">Xin Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+Z">Zhengda Zhou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yi,+H">Haohan Yi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zheng,+X">Xuhui Zheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wan,+H">Hao Wan</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 25pages, 1 figure </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Programming Languages (cs.PL) </div> <p class='mathjax'> In order to solve the problem of insufficient generation quality caused by traditional patent text abstract generation models only originating from patent specifications, the problem of new terminology OOV caused by rapid patent updates, and the problem of information redundancy caused by insufficient consideration of the high professionalism, accuracy, and uniqueness of patent texts, we proposes a patent text abstract generation model (MSEA) based on a master-slave encoder architecture; Firstly, the MSEA model designs a master-slave encoder, which combines the instructions in the patent text with the claims as input, and fully explores the characteristics and details between the two through the master-slave encoder; Then, the model enhances the consideration of new technical terms in the input sequence based on the pointer network, and further enhances the correlation with the input text by re weighing the "remembered" and "for-gotten" parts of the input sequence from the encoder; Finally, an enhanced repetition suppression mechanism for patent text was introduced to ensure accurate and non redundant abstracts generated. On a publicly available patent text dataset, compared to the state-of-the-art model, Improved Multi-Head Attention Mechanism (IMHAM), the MSEA model achieves an improvement of 0.006, 0.005, and 0.005 in Rouge-1, Rouge-2, and Rouge-L scores, respectively. MSEA leverages the characteristics of patent texts to effectively enhance the quality of patent text generation, demonstrating its advancement and effectiveness in the experiments. </p> </div> </dd> <dt> <a name='item222'>[222]</a> <a href ="/abs/2411.14073" title="Abstract" id="2411.14073"> arXiv:2411.14073 </a> [<a href="/pdf/2411.14073" title="Download PDF" id="pdf-2411.14073" aria-labelledby="pdf-2411.14073">pdf</a>, <a href="https://arxiv.org/html/2411.14073v1" title="View HTML" id="html-2411.14073" aria-labelledby="html-2411.14073" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14073" title="Other formats" id="oth-2411.14073" aria-labelledby="oth-2411.14073">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Meaning at the Planck scale? Contextualized word embeddings for doing history, philosophy, and sociology of science </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Simons,+A">Arno Simons</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 18 pages, 7 figures (1 in the Supplement) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; History and Philosophy of Physics (physics.hist-ph) </div> <p class='mathjax'> This paper explores the potential of contextualized word embeddings (CWEs) as a new tool in the history, philosophy, and sociology of science (HPSS) for studying contextual and evolving meanings of scientific concepts. Using the term "Planck" as a test case, I evaluate five BERT-based models with varying degrees of domain-specific pretraining, including my custom model Astro-HEP-BERT, trained on the Astro-HEP Corpus, a dataset containing 21.84 million paragraphs from 600,000 articles in astrophysics and high-energy physics. For this analysis, I compiled two labeled datasets: (1) the Astro-HEP-Planck Corpus, consisting of 2,900 labeled occurrences of "Planck" sampled from 1,500 paragraphs in the Astro-HEP Corpus, and (2) a physics-related Wikipedia dataset comprising 1,186 labeled occurrences of "Planck" across 885 paragraphs. Results demonstrate that the domain-adapted models outperform the general-purpose ones in disambiguating the target term, predicting its known meanings, and generating high-quality sense clusters, as measured by a novel purity indicator I developed. Additionally, this approach reveals semantic shifts in the target term over three decades in the unlabeled Astro-HEP Corpus, highlighting the emergence of the Planck space mission as a dominant sense. The study underscores the importance of domain-specific pretraining for analyzing scientific language and demonstrates the cost-effectiveness of adapting pretrained models for HPSS research. By offering a scalable and transferable method for modeling the meanings of scientific concepts, CWEs open up new avenues for investigating the socio-historical dynamics of scientific discourses. </p> </div> </dd> <dt> <a name='item223'>[223]</a> <a href ="/abs/2411.14077" title="Abstract" id="2411.14077"> arXiv:2411.14077 </a> [<a href="/pdf/2411.14077" title="Download PDF" id="pdf-2411.14077" aria-labelledby="pdf-2411.14077">pdf</a>, <a href="https://arxiv.org/html/2411.14077v1" title="View HTML" id="html-2411.14077" aria-labelledby="html-2411.14077" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14077" title="Other formats" id="oth-2411.14077" aria-labelledby="oth-2411.14077">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> On PI-control in Capacity-Limited Networks </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Agner,+F">Felix Agner</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Rantzer,+A">Anders Rantzer</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Systems and Control (eess.SY)</span> </div> <p class='mathjax'> This paper concerns control of a class of systems where multiple dynamically stable agents share a nonlinear and bounded control-interconnection. The agents are subject to a disturbance which is too large to reject with the available control action, making it impossible to stabilize all agents in their desired states. In this nonlinear setting, we consider two different anti-windup equipped proportional-integral control strategies and analyze their properties. We show that a fully decentralized strategy will globally, asymptotically stabilize a unique equilibrium. This equilibrium also minimizes a weighted sum of the tracking errors. We also consider a light addition to the fully decentralized strategy, where rank-1 coordination between the agents is introduced via the anti-windup action. We show that any equilibrium to this closed-loop system minimizes the maximum tracking error for any agent. A remarkable property of these results is that they rely on extremely few assumptions on the interconnection between the agents. Finally we illustrate how the considered model can be applied in a district heating setting, and demonstrate the two considered controllers in a simulation. </p> </div> </dd> <dt> <a name='item224'>[224]</a> <a href ="/abs/2411.14082" title="Abstract" id="2411.14082"> arXiv:2411.14082 </a> [<a href="/pdf/2411.14082" title="Download PDF" id="pdf-2411.14082" aria-labelledby="pdf-2411.14082">pdf</a>, <a href="https://arxiv.org/html/2411.14082v1" title="View HTML" id="html-2411.14082" aria-labelledby="html-2411.14082" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14082" title="Other formats" id="oth-2411.14082" aria-labelledby="oth-2411.14082">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> CKTSO: High-Performance Parallel Sparse Linear Solver for General Circuit Simulations </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+X">Xiaoming Chen</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Hardware Architecture (cs.AR)</span> </div> <p class='mathjax'> This paper introduces CKTSO (abbreviation of "circuit solver"), a novel sparse linear solver specially designed for the simulation program with integrated circuit emphasis (SPICE). CKTSO is a parallel solver and can be run on a multi-core, shared-memory computer. The algorithms of CKTSO are designed by considering the features of matrices involved in SPICE simulations. CKTSO is superior to existing similar solvers mainly in the following three aspects. First, the matrix ordering step of CKTSO combines different types of ordering algorithms such that it can generally obtain the fewest fill-ins for a wide range of circuit matrices. Second, CKTSO provides a parallel fast LU factorization algorithm with pivot check, which behaves good performance, scalability, and numerical stability. Third, CKTSO provides a structure-adaptive hybrid parallel triangular solving algorithm, which can adapt to various circuit matrices. Experiments including both benchmark tests and SPICE simulations demonstrate the superior performance of CKTSO. The libraries of CKTSO are available at <a href="https://github.com/chenxm1986/cktso" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item225'>[225]</a> <a href ="/abs/2411.14084" title="Abstract" id="2411.14084"> arXiv:2411.14084 </a> [<a href="/pdf/2411.14084" title="Download PDF" id="pdf-2411.14084" aria-labelledby="pdf-2411.14084">pdf</a>, <a href="https://arxiv.org/html/2411.14084v1" title="View HTML" id="html-2411.14084" aria-labelledby="html-2411.14084" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14084" title="Other formats" id="oth-2411.14084" aria-labelledby="oth-2411.14084">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Neural numerical homogenization based on Deep Ritz corrections </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Elasmi,+M">Mehdi Elasmi</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Krumbiegel,+F">Felix Krumbiegel</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Maier,+R">Roland Maier</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Numerical Analysis (math.NA)</span> </div> <p class='mathjax'> Numerical homogenization methods aim at providing appropriate coarse-scale approximations of solutions to (elliptic) partial differential equations that involve highly oscillatory coefficients. The localized orthogonal decomposition (LOD) method is an effective way of dealing with such coefficients, especially if they are non-periodic and non-smooth. It modifies classical finite element basis functions by suitable fine-scale corrections. In this paper, we make use of the structure of the LOD method, but we propose to calculate the corrections based on a Deep Ritz approach involving a parametrization of the coefficients to tackle temporal variations or uncertainties. Numerical examples for a parabolic model problem are presented to assess the performance of the approach. </p> </div> </dd> <dt> <a name='item226'>[226]</a> <a href ="/abs/2411.14085" title="Abstract" id="2411.14085"> arXiv:2411.14085 </a> [<a href="/pdf/2411.14085" title="Download PDF" id="pdf-2411.14085" aria-labelledby="pdf-2411.14085">pdf</a>, <a href="https://arxiv.org/html/2411.14085v1" title="View HTML" id="html-2411.14085" aria-labelledby="html-2411.14085" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14085" title="Other formats" id="oth-2411.14085" aria-labelledby="oth-2411.14085">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Exploration by Running Away from the Past </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Tolguenec,+P+L">Paul-Antoine Le Tolguenec</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Besse,+Y">Yann Besse</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Teichteil-Koenigsbuch,+F">Florent Teichteil-Koenigsbuch</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wilson,+D+G">Dennis G. Wilson</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rachelson,+E">Emmanuel Rachelson</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> The ability to explore efficiently and effectively is a central challenge of reinforcement learning. In this work, we consider exploration through the lens of information theory. Specifically, we cast exploration as a problem of maximizing the Shannon entropy of the state occupation measure. This is done by maximizing a sequence of divergences between distributions representing an agent's past behavior and its current behavior. Intuitively, this encourages the agent to explore new behaviors that are distinct from past behaviors. Hence, we call our method RAMP, for ``$\textbf{R}$unning $\textbf{A}$way fro$\textbf{m}$ the $\textbf{P}$ast.'' A fundamental question of this method is the quantification of the distribution change over time. We consider both the Kullback-Leibler divergence and the Wasserstein distance to quantify divergence between successive state occupation measures, and explain why the former might lead to undesirable exploratory behaviors in some tasks. We demonstrate that by encouraging the agent to explore by actively distancing itself from past experiences, it can effectively explore mazes and a wide range of behaviors on robotic manipulation and locomotion tasks. </p> </div> </dd> <dt> <a name='item227'>[227]</a> <a href ="/abs/2411.14086" title="Abstract" id="2411.14086"> arXiv:2411.14086 </a> [<a href="/pdf/2411.14086" title="Download PDF" id="pdf-2411.14086" aria-labelledby="pdf-2411.14086">pdf</a>, <a href="https://arxiv.org/html/2411.14086v1" title="View HTML" id="html-2411.14086" aria-labelledby="html-2411.14086" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14086" title="Other formats" id="oth-2411.14086" aria-labelledby="oth-2411.14086">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Path Tracking Hybrid A* For Autonomous Agricultural Vehicles </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Lu,+M">Mingke Lu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gao,+H">Han Gao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dai,+H">Haijie Dai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lei,+Q">Qianli Lei</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+C">Chang Liu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span> </div> <p class='mathjax'> We propose a path-tracking Hybrid A* planner and a coupled hierarchical Model Predictive Control (MPC) controller in scenarios involving the path smoothing of agricultural vehicles. For agricultural vehicles following reference paths on farmlands, especially during cross-furrow operations, a minimum deviation from the reference path is desired, in addition to the curvature constraints and body scale collision avoidance. Our contribution is threefold. (1) We propose the path-tracking Hybrid A*, which satisfies nonholonomic constraints and vehicle size collision avoidance, and devise new cost and heuristic functions to minimize the deviation degree. The path-tracking Hybrid A* can not only function in offline smoothing but also the real-time adjustment when confronted with unexpected obstacles. (2) We propose the hierarchical MPC to safely track the smoothed trajectory, using the initial solution solved by linearized MPC and nonlinear local adjustments around the initial solution. (3) We carry out extensive simulations with baseline comparisons based on real-world farm datasets to evaluate the performance of our algorithm. </p> </div> </dd> <dt> <a name='item228'>[228]</a> <a href ="/abs/2411.14087" title="Abstract" id="2411.14087"> arXiv:2411.14087 </a> [<a href="/pdf/2411.14087" title="Download PDF" id="pdf-2411.14087" aria-labelledby="pdf-2411.14087">pdf</a>, <a href="https://arxiv.org/html/2411.14087v1" title="View HTML" id="html-2411.14087" aria-labelledby="html-2411.14087" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14087" title="Other formats" id="oth-2411.14087" aria-labelledby="oth-2411.14087">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Determining the covering radius of all generalized Zetterberg codes in odd characteristic </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Shi,+M">Minjia Shi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+S">Shitao Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Helleseth,+T">Tor Helleseth</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ozbudak,+F">Ferruh Ozbudak</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Information Theory (cs.IT)</span> </div> <p class='mathjax'> For an integer $s\ge 1$, let $\mathcal{C}_s(q_0)$ be the generalized Zetterberg code of length $q_0^s+1$ over the finite field $\F_{q_0}$ of odd characteristic. Recently, Shi, Helleseth, and 脰zbudak (IEEE Trans. Inf. Theory 69(11): 7025-7048, 2023) determined the covering radius of $\mathcal{C}_s(q_0)$ for $q_0^s \not \equiv 7 \pmod{8}$, and left the remaining case as an open problem. In this paper, we develop a general technique involving arithmetic of finite fields and algebraic curves over finite fields to determine the covering radius of all generalized Zetterberg codes for $q_0^s \equiv 7 \pmod{8}$, which therefore solves this open problem. We also introduce the concept of twisted half generalized Zetterberg codes of length $\frac{q_0^s+1}{2}$, and show the same results hold for them. As a result, we obtain some quasi-perfect codes. </p> </div> </dd> <dt> <a name='item229'>[229]</a> <a href ="/abs/2411.14088" title="Abstract" id="2411.14088"> arXiv:2411.14088 </a> [<a href="/pdf/2411.14088" title="Download PDF" id="pdf-2411.14088" aria-labelledby="pdf-2411.14088">pdf</a>, <a href="https://arxiv.org/html/2411.14088v1" title="View HTML" id="html-2411.14088" aria-labelledby="html-2411.14088" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14088" title="Other formats" id="oth-2411.14088" aria-labelledby="oth-2411.14088">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Channel Customization for Low-Complexity CSI Acquisition in Multi-RIS-Assisted MIMO Systems </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+W">Weicong Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Han,+Y">Yu Han</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wen,+C">Chao-Kai Wen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+X">Xiao Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jin,+S">Shi Jin</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted by IEEE JSAC special issue on Next Generation Advanced Transceiver Technologies </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Information Theory (cs.IT)</span>; Signal Processing (eess.SP) </div> <p class='mathjax'> The deployment of multiple reconfigurable intelligent surfaces (RISs) enhances the propagation environment by improving channel quality, but it also complicates channel estimation. Following the conventional wireless communication system design, which involves full channel state information (CSI) acquisition followed by RIS configuration, can reduce transmission efficiency due to substantial pilot overhead and computational complexity. This study introduces an innovative approach that integrates CSI acquisition and RIS configuration, leveraging the channel-altering capabilities of the RIS to reduce both the overhead and complexity of CSI acquisition. The focus is on multi-RIS-assisted systems, featuring both direct and reflected propagation paths. By applying a fast-varying reflection sequence during RIS configuration for channel training, the complex problem of channel estimation is decomposed into simpler, independent tasks. These fast-varying reflections effectively isolate transmit signals from different paths, streamlining the CSI acquisition process for both uplink and downlink communications with reduced complexity. In uplink scenarios, a positioning-based algorithm derives partial CSI, informing the adjustment of RIS parameters to create a sparse reflection channel, enabling precise reconstruction of the uplink channel. Downlink communication benefits from this strategically tailored reflection channel, allowing effective CSI acquisition with fewer pilot signals. Simulation results highlight the proposed methodology's ability to accurately reconstruct the reflection channel with minimal impact on the normalized mean square error while simultaneously enhancing spectral efficiency. </p> </div> </dd> <dt> <a name='item230'>[230]</a> <a href ="/abs/2411.14092" title="Abstract" id="2411.14092"> arXiv:2411.14092 </a> [<a href="/pdf/2411.14092" title="Download PDF" id="pdf-2411.14092" aria-labelledby="pdf-2411.14092">pdf</a>, <a href="https://arxiv.org/html/2411.14092v1" title="View HTML" id="html-2411.14092" aria-labelledby="html-2411.14092" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14092" title="Other formats" id="oth-2411.14092" aria-labelledby="oth-2411.14092">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> MetaCropFollow: Few-Shot Adaptation with Meta-Learning for Under-Canopy Navigation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Woehrle,+T">Thomas Woehrle</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sivakumar,+A+N">Arun N. Sivakumar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Uppalapati,+N">Naveen Uppalapati</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chowdhary,+G">Girish Chowdhary</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span>; Artificial Intelligence (cs.AI); Computer Vision and Pattern Recognition (cs.CV); Machine Learning (cs.LG) </div> <p class='mathjax'> Autonomous under-canopy navigation faces additional challenges compared to over-canopy settings - for example the tight spacing between the crop rows, degraded GPS accuracy and excessive clutter. Keypoint-based visual navigation has been shown to perform well in these conditions, however the differences between agricultural environments in terms of lighting, season, soil and crop type mean that a domain shift will likely be encountered at some point of the robot deployment. In this paper, we explore the use of Meta-Learning to overcome this domain shift using a minimal amount of data. We train a base-learner that can quickly adapt to new conditions, enabling more robust navigation in low-data regimes. </p> </div> </dd> <dt> <a name='item231'>[231]</a> <a href ="/abs/2411.14094" title="Abstract" id="2411.14094"> arXiv:2411.14094 </a> [<a href="/pdf/2411.14094" title="Download PDF" id="pdf-2411.14094" aria-labelledby="pdf-2411.14094">pdf</a>, <a href="https://arxiv.org/html/2411.14094v1" title="View HTML" id="html-2411.14094" aria-labelledby="html-2411.14094" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14094" title="Other formats" id="oth-2411.14094" aria-labelledby="oth-2411.14094">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> GNN-MultiFix: Addressing the pitfalls for GNNs for multi-label node classification </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+T">Tianqi Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Khosla,+M">Megha Khosla</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> Graph neural networks (GNNs) have emerged as powerful models for learning representations of graph data showing state of the art results in various tasks. Nevertheless, the superiority of these methods is usually supported by either evaluating their performance on small subset of benchmark datasets or by reasoning about their expressive power in terms of certain graph isomorphism tests. In this paper we critically analyse both these aspects through a transductive setting for the task of node classification. First, we delve deeper into the case of multi-label node classification which offers a more realistic scenario and has been ignored in most of the related works. Through analysing the training dynamics for GNN methods we highlight the failure of GNNs to learn over multi-label graph datasets even for the case of abundant training data. Second, we show that specifically for transductive node classification, even the most expressive GNN may fail to learn in absence of node attributes and without using explicit label information as input. To overcome this deficit, we propose a straightforward approach, referred to as GNN-MultiFix, that integrates the feature, label, and positional information of a node. GNN-MultiFix demonstrates significant improvement across all the multi-label datasets. We release our code at <a href="https://anonymous.4open.science/r/Graph-MultiFix-4121" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item232'>[232]</a> <a href ="/abs/2411.14095" title="Abstract" id="2411.14095"> arXiv:2411.14095 </a> [<a href="/pdf/2411.14095" title="Download PDF" id="pdf-2411.14095" aria-labelledby="pdf-2411.14095">pdf</a>, <a href="https://arxiv.org/html/2411.14095v1" title="View HTML" id="html-2411.14095" aria-labelledby="html-2411.14095" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14095" title="Other formats" id="oth-2411.14095" aria-labelledby="oth-2411.14095">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> WARLearn: Weather-Adaptive Representation Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Agarwal,+S">Shubham Agarwal</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Birman,+R">Raz Birman</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hadar,+O">Ofer Hadar</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted for publication in IEEE/CVF Winter Conference on Applications of Computer Vision (WACV), 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> This paper introduces WARLearn, a novel framework designed for adaptive representation learning in challenging and adversarial weather conditions. Leveraging the in-variance principal used in Barlow Twins, we demonstrate the capability to port the existing models initially trained on clear weather data to effectively handle adverse weather conditions. With minimal additional training, our method exhibits remarkable performance gains in scenarios characterized by fog and low-light conditions. This adaptive framework extends its applicability beyond adverse weather settings, offering a versatile solution for domains exhibiting variations in data distributions. Furthermore, WARLearn is invaluable in scenarios where data distributions undergo significant shifts over time, enabling models to remain updated and accurate. Our experimental findings reveal a remarkable performance, with a mean average precision (mAP) of 52.6% on unseen real-world foggy dataset (RTTS). Similarly, in low light conditions, our framework achieves a mAP of 55.7% on unseen real-world low light dataset (ExDark). Notably, WARLearn surpasses the performance of state-of-the-art frameworks including FeatEnHancer, Image Adaptive YOLO, DENet, C2PNet, PairLIE and ZeroDCE, by a substantial margin in adverse weather, improving the baseline performance in both foggy and low light conditions. The WARLearn code is available at <a href="https://github.com/ShubhamAgarwal12/WARLearn" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </p> </div> </dd> <dt> <a name='item233'>[233]</a> <a href ="/abs/2411.14103" title="Abstract" id="2411.14103"> arXiv:2411.14103 </a> [<a href="/pdf/2411.14103" title="Download PDF" id="pdf-2411.14103" aria-labelledby="pdf-2411.14103">pdf</a>, <a href="https://arxiv.org/html/2411.14103v1" title="View HTML" id="html-2411.14103" aria-labelledby="html-2411.14103" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14103" title="Other formats" id="oth-2411.14103" aria-labelledby="oth-2411.14103">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Lost in Inference: Rediscovering the Role of Natural Language Inference for Large Language Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Madaan,+L">Lovish Madaan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Esiobu,+D">David Esiobu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Stenetorp,+P">Pontus Stenetorp</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Plank,+B">Barbara Plank</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hupkes,+D">Dieuwke Hupkes</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> preprint, 13 pages </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span> </div> <p class='mathjax'> In the recent past, a popular way of evaluating natural language understanding (NLU), was to consider a model's ability to perform natural language inference (NLI) tasks. In this paper, we investigate if NLI tasks, that are rarely used for LLM evaluation, can still be informative for evaluating LLMs. Focusing on five different NLI benchmarks across six models of different scales, we investigate if they are able to discriminate models of different size and quality and how their accuracies develop during training. Furthermore, we investigate the extent to which the softmax distributions of models align with human distributions in cases where statements are ambiguous or vague. Overall, our results paint a positive picture for the NLI tasks: we find that they are able to discriminate well between models at various stages of training, yet are not (all) saturated. Furthermore, we find that while the similarity of model distributions with human label distributions increases with scale, it is still much higher than the similarity between two populations of humans, making it a potentially interesting statistic to consider. </p> </div> </dd> <dt> <a name='item234'>[234]</a> <a href ="/abs/2411.14110" title="Abstract" id="2411.14110"> arXiv:2411.14110 </a> [<a href="/pdf/2411.14110" title="Download PDF" id="pdf-2411.14110" aria-labelledby="pdf-2411.14110">pdf</a>, <a href="https://arxiv.org/html/2411.14110v1" title="View HTML" id="html-2411.14110" aria-labelledby="html-2411.14110" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14110" title="Other formats" id="oth-2411.14110" aria-labelledby="oth-2411.14110">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> RAG-Thief: Scalable Extraction of Private Data from Retrieval-Augmented Generation Applications with Agent-based Attacks </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Jiang,+C">Changyue Jiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pan,+X">Xudong Pan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hong,+G">Geng Hong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bao,+C">Chenfu Bao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+M">Min Yang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Cryptography and Security (cs.CR)</span> </div> <p class='mathjax'> While large language models (LLMs) have achieved notable success in generative tasks, they still face limitations, such as lacking up-to-date knowledge and producing hallucinations. Retrieval-Augmented Generation (RAG) enhances LLM performance by integrating external knowledge bases, providing additional context which significantly improves accuracy and knowledge coverage. However, building these external knowledge bases often requires substantial resources and may involve sensitive information. In this paper, we propose an agent-based automated privacy attack called RAG-Thief, which can extract a scalable amount of private data from the private database used in RAG applications. We conduct a systematic study on the privacy risks associated with RAG applications, revealing that the vulnerability of LLMs makes the private knowledge bases suffer significant privacy risks. Unlike previous manual attacks which rely on traditional prompt injection techniques, RAG-Thief starts with an initial adversarial query and learns from model responses, progressively generating new queries to extract as many chunks from the knowledge base as possible. Experimental results show that our RAG-Thief can extract over 70% information from the private knowledge bases within customized RAG applications deployed on local machines and real-world platforms, including OpenAI's GPTs and ByteDance's Coze. Our findings highlight the privacy vulnerabilities in current RAG applications and underscore the pressing need for stronger safeguards. </p> </div> </dd> <dt> <a name='item235'>[235]</a> <a href ="/abs/2411.14117" title="Abstract" id="2411.14117"> arXiv:2411.14117 </a> [<a href="/pdf/2411.14117" title="Download PDF" id="pdf-2411.14117" aria-labelledby="pdf-2411.14117">pdf</a>, <a href="https://arxiv.org/html/2411.14117v1" title="View HTML" id="html-2411.14117" aria-labelledby="html-2411.14117" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14117" title="Other formats" id="oth-2411.14117" aria-labelledby="oth-2411.14117">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Umbrella Reinforcement Learning -- computationally efficient tool for hard non-linear problems </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Nuzhin,+E+E">Egor E. Nuzhin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Brilliantov,+N+V">Nikolai V. Brilliantov</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> We report a novel, computationally efficient approach for solving hard nonlinear problems of reinforcement learning (RL). Here we combine umbrella sampling, from computational physics/chemistry, with optimal control methods. The approach is realized on the basis of neural networks, with the use of policy gradient. It outperforms, by computational efficiency and implementation universality, all available state-of-the-art algorithms, in application to hard RL problems with sparse reward, state traps and lack of terminal states. The proposed approach uses an ensemble of simultaneously acting agents, with a modified reward which includes the ensemble entropy, yielding an optimal exploration-exploitation balance. </p> </div> </dd> <dt> <a name='item236'>[236]</a> <a href ="/abs/2411.14119" title="Abstract" id="2411.14119"> arXiv:2411.14119 </a> [<a href="/pdf/2411.14119" title="Download PDF" id="pdf-2411.14119" aria-labelledby="pdf-2411.14119">pdf</a>, <a href="https://arxiv.org/html/2411.14119v1" title="View HTML" id="html-2411.14119" aria-labelledby="html-2411.14119" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14119" title="Other formats" id="oth-2411.14119" aria-labelledby="oth-2411.14119">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Uncertainty-Aware Regression for Socio-Economic Estimation via Multi-View Remote Sensing </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+F">Fan Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ishida,+S">Sahoko Ishida</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+M">Mengyan Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jenson,+D">Daniel Jenson</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mishra,+S">Swapnil Mishra</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Navott,+J">Jhonathan Navott</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Flaxman,+S">Seth Flaxman</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 11 pages, 4 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Remote sensing imagery offers rich spectral data across extensive areas for Earth observation. Many attempts have been made to leverage these data with transfer learning to develop scalable alternatives for estimating socio-economic conditions, reducing reliance on expensive survey-collected data. However, much of this research has primarily focused on daytime satellite imagery due to the limitation that most pre-trained models are trained on 3-band RGB images. Consequently, modeling techniques for spectral bands beyond the visible spectrum have not been thoroughly investigated. Additionally, quantifying uncertainty in remote sensing regression has been less explored, yet it is essential for more informed targeting and iterative collection of ground truth survey data. In this paper, we introduce a novel framework that leverages generic foundational vision models to process remote sensing imagery using combinations of three spectral bands to exploit multi-spectral data. We also employ methods such as heteroscedastic regression and Bayesian modeling to generate uncertainty estimates for the predictions. Experimental results demonstrate that our method outperforms existing models that use RGB or multi-spectral models with unstructured band usage. Moreover, our framework helps identify uncertain predictions, guiding future ground truth data acquisition. </p> </div> </dd> <dt> <a name='item237'>[237]</a> <a href ="/abs/2411.14120" title="Abstract" id="2411.14120"> arXiv:2411.14120 </a> [<a href="/pdf/2411.14120" title="Download PDF" id="pdf-2411.14120" aria-labelledby="pdf-2411.14120">pdf</a>, <a href="https://arxiv.org/html/2411.14120v1" title="View HTML" id="html-2411.14120" aria-labelledby="html-2411.14120" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14120" title="Other formats" id="oth-2411.14120" aria-labelledby="oth-2411.14120">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Point Cloud Resampling with Learnable Heat Diffusion </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+W">Wenqiang Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dai,+W">Wenrui Dai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xue,+D">Duoduo Xue</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zheng,+Z">Ziyang Zheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+C">Chenglin Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zou,+J">Junni Zou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xiong,+H">Hongkai Xiong</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Generative diffusion models have shown empirical successes in point cloud resampling, generating a denser and more uniform distribution of points from sparse or noisy 3D point clouds by progressively refining noise into structure. However, existing diffusion models employ manually predefined schemes, which often fail to recover the underlying point cloud structure due to the rigid and disruptive nature of the geometric degradation. To address this issue, we propose a novel learnable heat diffusion framework for point cloud resampling, which directly parameterizes the marginal distribution for the forward process by learning the adaptive heat diffusion schedules and local filtering scales of the time-varying heat kernel, and consequently, generates an adaptive conditional prior for the reverse process. Unlike previous diffusion models with a fixed prior, the adaptive conditional prior selectively preserves geometric features of the point cloud by minimizing a refined variational lower bound, guiding the points to evolve towards the underlying surface during the reverse process. Extensive experimental results demonstrate that the proposed point cloud resampling achieves state-of-the-art performance in representative reconstruction tasks including point cloud denoising and upsampling. </p> </div> </dd> <dt> <a name='item238'>[238]</a> <a href ="/abs/2411.14121" title="Abstract" id="2411.14121"> arXiv:2411.14121 </a> [<a href="/pdf/2411.14121" title="Download PDF" id="pdf-2411.14121" aria-labelledby="pdf-2411.14121">pdf</a>, <a href="https://arxiv.org/html/2411.14121v1" title="View HTML" id="html-2411.14121" aria-labelledby="html-2411.14121" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14121" title="Other formats" id="oth-2411.14121" aria-labelledby="oth-2411.14121">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Learning from "Silly" Questions Improves Large Language Models, But Only Slightly </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+T">Tingyuan Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+S">Shudong Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Y">Yidong Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wong,+D+F">Derek F. Wong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+H">Han Yu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shinozaki,+T">Takahiro Shinozaki</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+J">Jindong Wang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 27 pages, 14 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span> </div> <p class='mathjax'> Constructing high-quality Supervised Fine-Tuning (SFT) datasets is critical for the training of large language models (LLMs). Recent studies have shown that using data from a specific source, Ruozhiba, a Chinese website where users ask "silly" questions to better understand certain topics, can lead to better fine-tuning performance. This paper aims to explore some hidden factors: the potential interpretations of its success and a large-scale evaluation of the performance. First, we leverage GPT-4 to analyze the successful cases of Ruozhiba questions from the perspective of education, psychology, and cognitive science, deriving a set of explanatory rules. Then, we construct fine-tuning datasets by applying these rules to the MMLU training set. Surprisingly, our results indicate that rules can significantly improve model performance in certain tasks, while potentially diminishing performance on others. For example, SFT data generated following the "Counterintuitive Thinking" rule can achieve approximately a 5% improvement on the "Global Facts" task, whereas the "Blurring the Conceptual Boundaries" rule leads to a performance drop of 6.14% on the "Econometrics" task. In addition, for specific tasks, different rules tend to have a consistent impact on model performance. This suggests that the differences between the extracted rules are not as significant, and the effectiveness of the rules is relatively consistent across tasks. Our research highlights the importance of considering task diversity and rule applicability when constructing SFT datasets to achieve more comprehensive performance improvements. </p> </div> </dd> <dt> <a name='item239'>[239]</a> <a href ="/abs/2411.14123" title="Abstract" id="2411.14123"> arXiv:2411.14123 </a> [<a href="/pdf/2411.14123" title="Download PDF" id="pdf-2411.14123" aria-labelledby="pdf-2411.14123">pdf</a>, <a href="https://arxiv.org/html/2411.14123v1" title="View HTML" id="html-2411.14123" aria-labelledby="html-2411.14123" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14123" title="Other formats" id="oth-2411.14123" aria-labelledby="oth-2411.14123">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Multi-terminal Strong Coordination subject to Secrecy Constraints </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ramachandran,+V">Viswanathan Ramachandran</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Oechtering,+T+J">Tobias J. Oechtering</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Skoglund,+M">Mikael Skoglund</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Extended version of ISIT 2024 paper </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Information Theory (cs.IT)</span> </div> <p class='mathjax'> A fundamental problem in decentralized networked systems is to coordinate actions of different agents so that they reach a state of agreement. In such applications, it is additionally desirable that the actions at various nodes may not be anticipated by malicious eavesdroppers. Motivated by this, we investigate the problem of secure multi-terminal strong coordination aided by a multiple-access wiretap channel. In this setup, independent and identically distributed copies of correlated sources are observed by two transmitters who encode the channel inputs to the MAC-WT. The legitimate receiver observing the channel output and side information correlated with the sources must produce approximately i.i.d. copies of an output variable jointly distributed with the sources. Furthermore, we demand that an external eavesdropper learns essentially nothin g about the sources and the simulated output sequence by observing its own MAC-WT output. This setting is aided by the presence of independent pairwise shared randomness between each encoder and the legitimate decoder, that is unavailable to the eavesdropper. We derive an achievable rate region based on a combination of coordination coding and wiretap coding, along with an outer bound. The inner bound is shown to be tight and a complete characterization is derived for the special case when the sources are conditionally independent given the decoder side information and the legitimate channel is composed of deterministic links. Further, we also analyze a more general scenario with possible encoder cooperation, where one of the encoders can non-causally crib from the other encoders input, for which an achievable rate region is proposed. We then explicitly compute the rate regions for an example both with and without cribbing between the encoders, and demonstrate that cribbing strictly improves upon the achievable rate region. </p> </div> </dd> <dt> <a name='item240'>[240]</a> <a href ="/abs/2411.14125" title="Abstract" id="2411.14125"> arXiv:2411.14125 </a> [<a href="/pdf/2411.14125" title="Download PDF" id="pdf-2411.14125" aria-labelledby="pdf-2411.14125">pdf</a>, <a href="https://arxiv.org/html/2411.14125v1" title="View HTML" id="html-2411.14125" aria-labelledby="html-2411.14125" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14125" title="Other formats" id="oth-2411.14125" aria-labelledby="oth-2411.14125">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> RestorerID: Towards Tuning-Free Face Restoration with ID Preservation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ying,+J">Jiacheng Ying</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+M">Mushui Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+Z">Zhe Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+R">Runming Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+Z">Zhu Yu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fu,+S">Siming Fu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cao,+S">Si-Yuan Cao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+C">Chao Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+Y">Yunlong Yu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shen,+H">Hui-Liang Shen</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 10 pages, 10 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Blind face restoration has made great progress in producing high-quality and lifelike images. Yet it remains challenging to preserve the ID information especially when the degradation is heavy. Current reference-guided face restoration approaches either require face alignment or personalized test-tuning, which are unfaithful or time-consuming. In this paper, we propose a tuning-free method named RestorerID that incorporates ID preservation during face restoration. RestorerID is a diffusion model-based method that restores low-quality images with varying levels of degradation by using a single reference image. To achieve this, we propose a unified framework to combine the ID injection with the base blind face restoration model. In addition, we design a novel Face ID Rebalancing Adapter (FIR-Adapter) to tackle the problems of content unconsistency and contours misalignment that are caused by information conflicts between the low-quality input and reference image. Furthermore, by employing an Adaptive ID-Scale Adjusting strategy, RestorerID can produce superior restored images across various levels of degradation. Experimental results on the Celeb-Ref dataset and real-world scenarios demonstrate that RestorerID effectively delivers high-quality face restoration with ID preservation, achieving a superior performance compared to the test-tuning approaches and other reference-guided ones. The code of RestorerID is available at \url{<a href="https://github.com/YingJiacheng/RestorerID" rel="external noopener nofollow" class="link-external link-https">this https URL</a>}. </p> </div> </dd> <dt> <a name='item241'>[241]</a> <a href ="/abs/2411.14131" title="Abstract" id="2411.14131"> arXiv:2411.14131 </a> [<a href="/pdf/2411.14131" title="Download PDF" id="pdf-2411.14131" aria-labelledby="pdf-2411.14131">pdf</a>, <a href="https://arxiv.org/html/2411.14131v1" title="View HTML" id="html-2411.14131" aria-labelledby="html-2411.14131" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14131" title="Other formats" id="oth-2411.14131" aria-labelledby="oth-2411.14131">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> sEMG-based Gesture-Free Hand Intention Recognition: System, Dataset, Toolbox, and Benchmark Results </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+H">Hongxin Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tang,+J">Jingsheng Tang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+X">Xuechao Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dai,+W">Wei Dai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Y">Yaru Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xiao,+J">Junhao Xiao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lu,+H">Huimin Lu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+Z">Zongtan Zhou</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 8 pages </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Human-Computer Interaction (cs.HC)</span> </div> <p class='mathjax'> In sensitive scenarios, such as meetings, negotiations, and team sports, messages must be conveyed without detection by non-collaborators. Previous methods, such as encrypting messages, eye contact, and micro-gestures, had problems with either inaccurate information transmission or leakage of interaction intentions. To this end, a novel gesture-free hand intention recognition scheme was proposed, that adopted surface electromyography(sEMG) and isometric contraction theory to recognize different hand intentions without any gesture. Specifically, this work includes four parts: (1) the experimental system, consisting of the upper computer software, self-conducted myoelectric watch, and sports platform, is built to get sEMG signals and simulate multiple usage scenarios; (2) the paradigm is designed to standard prompt and collect the gesture-free sEMG datasets. Eight-channel signals of ten subjects were recorded twice per subject at about 5-10 days intervals; (3) the toolbox integrates preprocessing methods (data segmentation, filter, normalization, etc.), commonly used sEMG signal decoding methods, and various plotting functions, to facilitate the research of the dataset; (4) the benchmark results of widely used methods are provided. The results involve single-day, cross-day, and cross-subject experiments of 6-class and 12-class gesture-free hand intention when subjects with different sports motions. To help future research, all data, hardware, software, and methods are open-sourced on the following website: click here. </p> </div> </dd> <dt> <a name='item242'>[242]</a> <a href ="/abs/2411.14133" title="Abstract" id="2411.14133"> arXiv:2411.14133 </a> [<a href="/pdf/2411.14133" title="Download PDF" id="pdf-2411.14133" aria-labelledby="pdf-2411.14133">pdf</a>, <a href="https://arxiv.org/html/2411.14133v1" title="View HTML" id="html-2411.14133" aria-labelledby="html-2411.14133" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14133" title="Other formats" id="oth-2411.14133" aria-labelledby="oth-2411.14133">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> GASP: Efficient Black-Box Generation of Adversarial Suffixes for Jailbreaking LLMs </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Basani,+A+R">Advik Raj Basani</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+X">Xiao Zhang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 28 pages, 9 tables, 13 figures; under review at CVPR '25 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Cryptography and Security (cs.CR); Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> Large Language Models (LLMs) have shown impressive proficiency across a range of natural language processing tasks yet remain vulnerable to adversarial prompts, known as jailbreak attacks, carefully designed to elicit harmful responses from LLMs. Traditional methods rely on manual heuristics, which suffer from limited generalizability. While being automatic, optimization-based attacks often produce unnatural jailbreak prompts that are easy to detect by safety filters or require high computational overhead due to discrete token optimization. Witnessing the limitations of existing jailbreak methods, we introduce Generative Adversarial Suffix Prompter (GASP), a novel framework that combines human-readable prompt generation with Latent Bayesian Optimization (LBO) to improve adversarial suffix creation in a fully black-box setting. GASP leverages LBO to craft adversarial suffixes by efficiently exploring continuous embedding spaces, gradually optimizing the model to improve attack efficacy while balancing prompt coherence through a targeted iterative refinement procedure. Our experiments show that GASP can generate natural jailbreak prompts, significantly improving attack success rates, reducing training times, and accelerating inference speed, thus making it an efficient and scalable solution for red-teaming LLMs. </p> </div> </dd> <dt> <a name='item243'>[243]</a> <a href ="/abs/2411.14137" title="Abstract" id="2411.14137"> arXiv:2411.14137 </a> [<a href="/pdf/2411.14137" title="Download PDF" id="pdf-2411.14137" aria-labelledby="pdf-2411.14137">pdf</a>, <a href="https://arxiv.org/html/2411.14137v1" title="View HTML" id="html-2411.14137" aria-labelledby="html-2411.14137" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14137" title="Other formats" id="oth-2411.14137" aria-labelledby="oth-2411.14137">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Visual Contexts Clarify Ambiguous Expressions: A Benchmark Dataset </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Nam,+H">Heejeong Nam</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ahn,+J">Jinwoo Ahn</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Computation and Language (cs.CL) </div> <p class='mathjax'> The ability to perform complex reasoning across multimodal inputs is essential for models to effectively interact with humans in real-world scenarios. Advancements in vision-language models have significantly improved performance on tasks that require processing explicit and direct textual inputs, such as Visual Question Answering (VQA) and Visual Grounding (VG). However, less attention has been given to improving the model capabilities to comprehend nuanced and ambiguous forms of communication. This presents a critical challenge, as human language in real-world interactions often convey hidden intentions that rely on context for accurate interpretation. To address this gap, we propose VAGUE, a multimodal benchmark comprising 3.9K indirect human utterances paired with corresponding scenes. Additionally, we contribute a model-based pipeline for generating prompt-solution pairs from input images. Our work aims to delve deeper into the ability of models to understand indirect communication and seek to contribute to the development of models capable of more refined and human-like interactions. Extensive evaluation on multiple VLMs reveals that mainstream models still struggle with indirect communication when required to perform complex linguistic and visual reasoning. We release our code and data at <a href="https://github.com/Hazel-Heejeong-Nam/VAGUE.git" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item244'>[244]</a> <a href ="/abs/2411.14141" title="Abstract" id="2411.14141"> arXiv:2411.14141 </a> [<a href="/pdf/2411.14141" title="Download PDF" id="pdf-2411.14141" aria-labelledby="pdf-2411.14141">pdf</a>, <a href="https://arxiv.org/html/2411.14141v1" title="View HTML" id="html-2411.14141" aria-labelledby="html-2411.14141" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14141" title="Other formats" id="oth-2411.14141" aria-labelledby="oth-2411.14141">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Differentiable SVD based on Moore-Penrose Pseudoinverse for Inverse Imaging Problems </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Zhang,+Y">Yinghao Zhang</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Hu,+Y">Yue Hu</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 11 pages </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Numerical Analysis (math.NA)</span>; Artificial Intelligence (cs.AI); Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> Low-rank regularization-based deep unrolling networks have achieved remarkable success in various inverse imaging problems (IIPs). However, the singular value decomposition (SVD) is non-differentiable when duplicated singular values occur, leading to severe numerical instability during training. In this paper, we propose a differentiable SVD based on the Moore-Penrose pseudoinverse to address this issue. To the best of our knowledge, this is the first work to provide a comprehensive analysis of the differentiability of the trivial SVD. Specifically, we show that the non-differentiability of SVD is essentially due to an underdetermined system of linear equations arising in the derivation process. We utilize the Moore-Penrose pseudoinverse to solve the system, thereby proposing a differentiable SVD. A numerical stability analysis in the context of IIPs is provided. Experimental results in color image compressed sensing and dynamic MRI reconstruction show that our proposed differentiable SVD can effectively address the numerical instability issue while ensuring computational precision. Code is available at <a href="https://github.com/yhao-z/SVD-inv" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item245'>[245]</a> <a href ="/abs/2411.14151" title="Abstract" id="2411.14151"> arXiv:2411.14151 </a> [<a href="/pdf/2411.14151" title="Download PDF" id="pdf-2411.14151" aria-labelledby="pdf-2411.14151">pdf</a>, <a href="https://arxiv.org/html/2411.14151v1" title="View HTML" id="html-2411.14151" aria-labelledby="html-2411.14151" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14151" title="Other formats" id="oth-2411.14151" aria-labelledby="oth-2411.14151">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Error Analysis of the Deep Mixed Residual Method for High-order Elliptic Equations </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Bai,+M">Mengjia Bai</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Chen,+J">Jingrun Chen</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Du,+R">Rui Du</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Sun,+Z">Zhiwei Sun</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 40 pages, none figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Numerical Analysis (math.NA)</span> </div> <p class='mathjax'> This paper presents an a priori error analysis of the Deep Mixed Residual method (MIM) for solving high-order elliptic equations with non-homogeneous boundary conditions, including Dirichlet, Neumann, and Robin conditions. We examine MIM with two types of loss functions, referred to as first-order and second-order least squares systems. By providing boundedness and coercivity analysis, we leverage C茅a's Lemma to decompose the total error into the approximation, generalization, and optimization errors. Utilizing the Barron space theory and Rademacher complexity, an a priori error is derived regarding the training samples and network size that are exempt from the curse of dimensionality. Our results reveal that MIM significantly reduces the regularity requirements for activation functions compared to the deep Ritz method, implying the effectiveness of MIM in solving high-order equations. </p> </div> </dd> <dt> <a name='item246'>[246]</a> <a href ="/abs/2411.14155" title="Abstract" id="2411.14155"> arXiv:2411.14155 </a> [<a href="/pdf/2411.14155" title="Download PDF" id="pdf-2411.14155" aria-labelledby="pdf-2411.14155">pdf</a>, <a href="https://arxiv.org/html/2411.14155v1" title="View HTML" id="html-2411.14155" aria-labelledby="html-2411.14155" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14155" title="Other formats" id="oth-2411.14155" aria-labelledby="oth-2411.14155">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Grand Challenges in the Verification of Autonomous Systems </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Leahy,+K">Kevin Leahy</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Asgari,+H">Hamid Asgari</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dennis,+L+A">Louise A. Dennis</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Feather,+M+S">Martin S. Feather</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fisher,+M">Michael Fisher</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ibanez-Guzman,+J">Javier Ibanez-Guzman</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Logan,+B">Brian Logan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Olszewska,+J+I">Joanna I. Olszewska</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Redfield,+S">Signe Redfield</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span> </div> <p class='mathjax'> Autonomous systems use independent decision-making with only limited human intervention to accomplish goals in complex and unpredictable environments. As the autonomy technologies that underpin them continue to advance, these systems will find their way into an increasing number of applications in an ever wider range of settings. If we are to deploy them to perform safety-critical or mission-critical roles, it is imperative that we have justified confidence in their safe and correct operation. Verification is the process by which such confidence is established. However, autonomous systems pose challenges to existing verification practices. This paper highlights viewpoints of the Roadmap Working Group of the IEEE Robotics and Automation Society Technical Committee for Verification of Autonomous Systems, identifying these grand challenges, and providing a vision for future research efforts that will be needed to address them. </p> </div> </dd> <dt> <a name='item247'>[247]</a> <a href ="/abs/2411.14158" title="Abstract" id="2411.14158"> arXiv:2411.14158 </a> [<a href="/pdf/2411.14158" title="Download PDF" id="pdf-2411.14158" aria-labelledby="pdf-2411.14158">pdf</a>, <a href="https://arxiv.org/html/2411.14158v1" title="View HTML" id="html-2411.14158" aria-labelledby="html-2411.14158" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14158" title="Other formats" id="oth-2411.14158" aria-labelledby="oth-2411.14158">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Point Cloud Denoising With Fine-Granularity Dynamic Graph Convolutional Networks </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+W">Wenqiang Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dai,+W">Wenrui Dai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xue,+D">Duoduo Xue</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zheng,+Z">Ziyang Zheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+C">Chenglin Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zou,+J">Junni Zou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xiong,+H">Hongkai Xiong</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Due to limitations in acquisition equipment, noise perturbations often corrupt 3-D point clouds, hindering down-stream tasks such as surface reconstruction, rendering, and further processing. Existing 3-D point cloud denoising methods typically fail to reliably fit the underlying continuous surface, resulting in a degradation of reconstruction performance. This paper introduces fine-granularity dynamic graph convolutional networks called GD-GCN, a novel approach to denoising in 3-D point clouds. The GD-GCN employs micro-step temporal graph convolution (MST-GConv) to perform feature learning in a gradual manner. Compared with the conventional GCN, which commonly uses discrete integer-step graph convolution, this modification introduces a more adaptable and nuanced approach to feature learning within graph convolution networks. It more accurately depicts the process of fitting the point cloud with noise to the underlying surface by and the learning process for MST-GConv acts like a changing system and is managed through a type of neural network known as neural Partial Differential Equations (PDEs). This means it can adapt and improve over time. GD-GCN approximates the Riemannian metric, calculating distances between points along a low-dimensional manifold. This capability allows it to understand the local geometric structure and effectively capture diverse relationships between points from different geometric regions through geometric graph construction based on Riemannian distances. Additionally, GD-GCN incorporates robust graph spectral filters based on the Bernstein polynomial approximation, which modulate eigenvalues for complex and arbitrary spectral responses, providing theoretical guarantees for BIBO stability. Symmetric channel mixing matrices further enhance filter flexibility by enabling channel-level scaling and shifting in the spectral domain. </p> </div> </dd> <dt> <a name='item248'>[248]</a> <a href ="/abs/2411.14162" title="Abstract" id="2411.14162"> arXiv:2411.14162 </a> [<a href="/pdf/2411.14162" title="Download PDF" id="pdf-2411.14162" aria-labelledby="pdf-2411.14162">pdf</a>, <a href="/format/2411.14162" title="Other formats" id="oth-2411.14162" aria-labelledby="oth-2411.14162">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Verification of Behavior Trees with Contingency Monitors </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Serbinowska,+S+S">Serena S. Serbinowska</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Potteiger,+N">Nicholas Potteiger</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tumlin,+A+M">Anne M. Tumlin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Johnson,+T+T">Taylor T. Johnson</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> In Proceedings FMAS2024, <a href="https://arxiv.org/abs/2411.13215" data-arxiv-id="2411.13215" class="link-https">arXiv:2411.13215</a> </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> EPTCS 411, 2024, pp. 56-72 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span> </div> <p class='mathjax'> Behavior Trees (BTs) are high level controllers that have found use in a wide range of robotics tasks. As they grow in popularity and usage, it is crucial to ensure that the appropriate tools and methods are available for ensuring they work as intended. To that end, we created a new methodology by which to create Runtime Monitors for BTs. These monitors can be used by the BT to correct when undesirable behavior is detected and are capable of handling LTL specifications. We demonstrate that in terms of runtime, the generated monitors are on par with monitors generated by existing tools and highlight certain features that make our method more desirable in various situations. We note that our method allows for our monitors to be swapped out with alternate monitors with fairly minimal user effort. Finally, our method ties in with our existing tool, BehaVerify, allowing for the verification of BTs with monitors. </p> </div> </dd> <dt> <a name='item249'>[249]</a> <a href ="/abs/2411.14163" title="Abstract" id="2411.14163"> arXiv:2411.14163 </a> [<a href="/pdf/2411.14163" title="Download PDF" id="pdf-2411.14163" aria-labelledby="pdf-2411.14163">pdf</a>, <a href="/format/2411.14163" title="Other formats" id="oth-2411.14163" aria-labelledby="oth-2411.14163">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Creating a Formally Verified Neural Network for Autonomous Navigation: An Experience Report </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Bukhari,+S+A+A">Syed Ali Asadullah Bukhari</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Flinkow,+T">Thomas Flinkow</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Inkarbekov,+M">Medet Inkarbekov</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pearlmutter,+B+A">Barak A. Pearlmutter</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Monahan,+R">Rosemary Monahan</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> In Proceedings FMAS2024, <a href="https://arxiv.org/abs/2411.13215" data-arxiv-id="2411.13215" class="link-https">arXiv:2411.13215</a> </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> EPTCS 411, 2024, pp. 178-190 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Logic in Computer Science (cs.LO)</span>; Computer Vision and Pattern Recognition (cs.CV); Machine Learning (cs.LG) </div> <p class='mathjax'> The increased reliance of self-driving vehicles on neural networks opens up the challenge of their verification. In this paper we present an experience report, describing a case study which we undertook to explore the design and training of a neural network on a custom dataset for vision-based autonomous navigation. We are particularly interested in the use of machine learning with differentiable logics to obtain networks satisfying basic safety properties by design, guaranteeing the behaviour of the neural network after training. We motivate the choice of a suitable neural network verifier for our purposes and report our observations on the use of neural network verifiers for self-driving systems. </p> </div> </dd> <dt> <a name='item250'>[250]</a> <a href ="/abs/2411.14164" title="Abstract" id="2411.14164"> arXiv:2411.14164 </a> [<a href="/pdf/2411.14164" title="Download PDF" id="pdf-2411.14164" aria-labelledby="pdf-2411.14164">pdf</a>, <a href="https://arxiv.org/html/2411.14164v1" title="View HTML" id="html-2411.14164" aria-labelledby="html-2411.14164" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14164" title="Other formats" id="oth-2411.14164" aria-labelledby="oth-2411.14164">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> FoPru: Focal Pruning for Efficient Large Vision-Language Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Jiang,+L">Lei Jiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+W">Weizhe Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+T">Tongxuan Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zeng,+Y">Yuting Zeng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+J">Jing Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cheng,+L">Lechao Cheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+X">Xiaohua Xu</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 11 pages, 7 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Large Vision-Language Models (LVLMs) represent a significant advancement toward achieving superior multimodal capabilities by enabling powerful Large Language Models (LLMs) to understand visual input. Typically, LVLMs utilize visual encoders, such as CLIP, to transform images into visual tokens, which are then aligned with textual tokens through projection layers before being input into the LLM for inference. Although existing LVLMs have achieved significant success, their inference efficiency is still limited by the substantial number of visual tokens and the potential redundancy among them. To mitigate this issue, we propose Focal Pruning (FoPru), a training-free method that prunes visual tokens based on the attention-based token significance derived from the vision encoder. Specifically, we introduce two alternative pruning strategies: 1) the rank strategy, which leverages all token significance scores to retain more critical tokens in a global view; 2) the row strategy, which focuses on preserving continuous key information in images from a local perspective. Finally, the selected tokens are reordered to maintain their original positional relationships. Extensive experiments across various LVLMs and multimodal datasets demonstrate that our method can prune a large number of redundant tokens while maintaining high accuracy, leading to significant improvements in inference efficiency. </p> </div> </dd> <dt> <a name='item251'>[251]</a> <a href ="/abs/2411.14165" title="Abstract" id="2411.14165"> arXiv:2411.14165 </a> [<a href="/pdf/2411.14165" title="Download PDF" id="pdf-2411.14165" aria-labelledby="pdf-2411.14165">pdf</a>, <a href="/format/2411.14165" title="Other formats" id="oth-2411.14165" aria-labelledby="oth-2411.14165">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Formalizing Stateful Behavior Trees </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Serbinowska,+S+S">Serena S. Serbinowska</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Robinette,+P">Preston Robinette</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Karsai,+G">Gabor Karsai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Johnson,+T+T">Taylor T. Johnson</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> In Proceedings FMAS2024, <a href="https://arxiv.org/abs/2411.13215" data-arxiv-id="2411.13215" class="link-https">arXiv:2411.13215</a> </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> EPTCS 411, 2024, pp. 201-218 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span> </div> <p class='mathjax'> Behavior Trees (BTs) are high-level controllers that are useful in a variety of planning tasks and are gaining traction in robotic mission planning. As they gain popularity in safety-critical domains, it is important to formalize their syntax and semantics, as well as verify properties for them. In this paper, we formalize a class of BTs we call Stateful Behavior Trees (SBTs) that have auxiliary variables and operate in an environment that can change over time. SBTs have access to persistent shared memory (often known as a blackboard) that keeps track of these auxiliary variables. We demonstrate that SBTs are equivalent in computational power to Turing Machines when the blackboard can store mathematical (i.e., unbounded) integers. We further identify syntactic assumptions where SBTs have computational power equivalent to finite state automata, specifically where the auxiliary variables are of finitary types. We present a domain specific language (DSL) for writing SBTs and adapt the tool BehaVerify for use with this DSL. This new DSL in BehaVerify supports interfacing with popular BT libraries in Python, and also provides generation of Haskell code and nuXmv models, the latter of which is used for model checking temporal logic specifications for the SBTs. We include examples and scalability results where BehaVerify outperforms another verification tool by a factor of 100. </p> </div> </dd> <dt> <a name='item252'>[252]</a> <a href ="/abs/2411.14168" title="Abstract" id="2411.14168"> arXiv:2411.14168 </a> [<a href="/pdf/2411.14168" title="Download PDF" id="pdf-2411.14168" aria-labelledby="pdf-2411.14168">pdf</a>, <a href="/format/2411.14168" title="Other formats" id="oth-2411.14168" aria-labelledby="oth-2411.14168">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Autonomous System Safety Properties with Multi-Machine Hybrid Event-B </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Banach,+R">Richard Banach</a> (University of Manchester, UK)</div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> In Proceedings FMAS2024, <a href="https://arxiv.org/abs/2411.13215" data-arxiv-id="2411.13215" class="link-https">arXiv:2411.13215</a> </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> EPTCS 411, 2024, pp. 1-19 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Multiagent Systems (cs.MA)</span> </div> <p class='mathjax'> Event-B is a well known methodology for the verified design and development of systems that can be characterised as discrete transition systems. Hybrid Event-B is a conservative extension that interleaves the discrete transitions of Event-B (assumed to be temporally isolated) with episodes of continuously varying state change. While a single Hybrid Event-B machine is sufficient for applications with a single locus of control, it will not do for autonomous systems, which have several loci of control by default. Multi-machine Hybrid Event-B is designed to allow the specification of systems with several loci of control. The formalism is succinctly surveyed, pointing out the subtle semantic issues involved. The multi-machine formalism is then used to specify a relatively simple incident response system, involving a controller, two drones and three responders, working in a partly coordinated and partly independent fashion to manage a putative hazardous scenario. </p> </div> </dd> <dt> <a name='item253'>[253]</a> <a href ="/abs/2411.14169" title="Abstract" id="2411.14169"> arXiv:2411.14169 </a> [<a href="/pdf/2411.14169" title="Download PDF" id="pdf-2411.14169" aria-labelledby="pdf-2411.14169">pdf</a>, <a href="https://arxiv.org/html/2411.14169v1" title="View HTML" id="html-2411.14169" aria-labelledby="html-2411.14169" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14169" title="Other formats" id="oth-2411.14169" aria-labelledby="oth-2411.14169">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Spatiotemporal Decoupling for Efficient Vision-Based Occupancy Forecasting </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+J">Jingyi Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+X">Xieyuanli Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ma,+J">Junyi Ma</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+J">Jiawei Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+J">Jintao Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Y">Yue Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pei,+L">Ling Pei</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> The task of occupancy forecasting (OCF) involves utilizing past and present perception data to predict future occupancy states of autonomous vehicle surrounding environments, which is critical for downstream tasks such as obstacle avoidance and path planning. Existing 3D OCF approaches struggle to predict plausible spatial details for movable objects and suffer from slow inference speeds due to neglecting the bias and uneven distribution of changing occupancy states in both space and time. In this paper, we propose a novel spatiotemporal decoupling vision-based paradigm to explicitly tackle the bias and achieve both effective and efficient 3D OCF. To tackle spatial bias in empty areas, we introduce a novel spatial representation that decouples the conventional dense 3D format into 2D bird's-eye view (BEV) occupancy with corresponding height values, enabling 3D OCF derived only from 2D predictions thus enhancing efficiency. To reduce temporal bias on static voxels, we design temporal decoupling to improve end-to-end OCF by temporally associating instances via predicted flows. We develop an efficient multi-head network EfficientOCF to achieve 3D OCF with our devised spatiotemporally decoupled representation. A new metric, conditional IoU (C-IoU), is also introduced to provide a robust 3D OCF performance assessment, especially in datasets with missing or incomplete annotations. The experimental results demonstrate that EfficientOCF surpasses existing baseline methods on accuracy and efficiency, achieving state-of-the-art performance with a fast inference time of 82.33ms with a single GPU. Our code will be released as open source. </p> </div> </dd> <dt> <a name='item254'>[254]</a> <a href ="/abs/2411.14174" title="Abstract" id="2411.14174"> arXiv:2411.14174 </a> [<a href="/pdf/2411.14174" title="Download PDF" id="pdf-2411.14174" aria-labelledby="pdf-2411.14174">pdf</a>, <a href="/format/2411.14174" title="Other formats" id="oth-2411.14174" aria-labelledby="oth-2411.14174">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Translating C To Rust: Lessons from a User Study </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+R">Ruishi Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+B">Bo Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+T">Tianyu Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Saxena,+P">Prateek Saxena</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kundu,+A">Ashish Kundu</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted by NDSS Symposium 2025. Please cite the conference version of this paper, e.g., "Ruishi Li, Bo Wang, Tianyu Li, Prateek Saxena, Ashish Kundu. Translating C To Rust: Lessons from a User Study. In 32nd Annual Network and Distributed System Security Symposium (NDSS 2025)." </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Software Engineering (cs.SE)</span>; Cryptography and Security (cs.CR); Programming Languages (cs.PL) </div> <p class='mathjax'> Rust aims to offer full memory safety for programs, a guarantee that untamed C programs do not enjoy. How difficult is it to translate existing C code to Rust? To get a complementary view from that of automatic C to Rust translators, we report on a user study asking humans to translate real-world C programs to Rust. Our participants are able to produce safe Rust translations, whereas state-of-the-art automatic tools are not able to do so. Our analysis highlights that the high-level strategy taken by users departs significantly from those of automatic tools we study. We also find that users often choose zero-cost (static) abstractions for temporal safety, which addresses a predominant component of runtime costs in other full memory safety defenses. User-provided translations showcase a rich landscape of specialized strategies to translate the same C program in different ways to safe Rust, which future automatic translators can consider. </p> </div> </dd> <dt> <a name='item255'>[255]</a> <a href ="/abs/2411.14179" title="Abstract" id="2411.14179"> arXiv:2411.14179 </a> [<a href="/pdf/2411.14179" title="Download PDF" id="pdf-2411.14179" aria-labelledby="pdf-2411.14179">pdf</a>, <a href="https://arxiv.org/html/2411.14179v1" title="View HTML" id="html-2411.14179" aria-labelledby="html-2411.14179" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14179" title="Other formats" id="oth-2411.14179" aria-labelledby="oth-2411.14179">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> CompetitorFormer: Competitor Transformer for 3D Instance Segmentation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+D">Duanchu Wang</a> (1), <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+J">Jing Liu</a> (2), <a href="https://arxiv.org/search/cs?searchtype=author&query=Gong,+H">Haoran Gong</a> (2), <a href="https://arxiv.org/search/cs?searchtype=author&query=Quan,+Y">Yinghui Quan</a> (1), <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+D">Di Wang</a> (2) ((1) School of Electronic Engineering, Xidian University (2) School of Software Engineering, Xian Jiaotong University)</div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Transformer-based methods have become the dominant approach for 3D instance segmentation. These methods predict instance masks via instance queries, ranking them by classification confidence and IoU scores to select the top prediction as the final outcome. However, it has been observed that the current models employ a fixed and higher number of queries than the instances present within a scene. In such instances, multiple queries predict the same instance, yet only a single query is ultimately optimized. The close scores of queries in the lower-level decoders make it challenging for the dominant query to distinguish itself rapidly, which ultimately impairs the model's accuracy and convergence efficiency. This phenomenon is referred to as inter-query competition. To address this challenge, we put forth a series of plug-and-play competition-oriented designs, collectively designated as the CompetitorFormer, with the aim of reducing competition and facilitating a dominant query. Experiments showed that integrating our designs with state-of-the-art frameworks consistently resulted in significant performance improvements in 3D instance segmentation across a range of datasets. </p> </div> </dd> <dt> <a name='item256'>[256]</a> <a href ="/abs/2411.14193" title="Abstract" id="2411.14193"> arXiv:2411.14193 </a> [<a href="/pdf/2411.14193" title="Download PDF" id="pdf-2411.14193" aria-labelledby="pdf-2411.14193">pdf</a>, <a href="https://arxiv.org/html/2411.14193v1" title="View HTML" id="html-2411.14193" aria-labelledby="html-2411.14193" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14193" title="Other formats" id="oth-2411.14193" aria-labelledby="oth-2411.14193">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> ComfyGI: Automatic Improvement of Image Generation Workflows </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Sobania,+D">Dominik Sobania</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Briesch,+M">Martin Briesch</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rothlauf,+F">Franz Rothlauf</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG); Neural and Evolutionary Computing (cs.NE) </div> <p class='mathjax'> Automatic image generation is no longer just of interest to researchers, but also to practitioners. However, current models are sensitive to the settings used and automatic optimization methods often require human involvement. To bridge this gap, we introduce ComfyGI, a novel approach to automatically improve workflows for image generation without the need for human intervention driven by techniques from genetic improvement. This enables image generation with significantly higher quality in terms of the alignment with the given description and the perceived aesthetics. On the performance side, we find that overall, the images generated with an optimized workflow are about 50% better compared to the initial workflow in terms of the median ImageReward score. These already good results are even surpassed in our human evaluation, as the participants preferred the images improved by ComfyGI in around 90% of the cases. </p> </div> </dd> <dt> <a name='item257'>[257]</a> <a href ="/abs/2411.14198" title="Abstract" id="2411.14198"> arXiv:2411.14198 </a> [<a href="/pdf/2411.14198" title="Download PDF" id="pdf-2411.14198" aria-labelledby="pdf-2411.14198">pdf</a>, <a href="https://arxiv.org/html/2411.14198v1" title="View HTML" id="html-2411.14198" aria-labelledby="html-2411.14198" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14198" title="Other formats" id="oth-2411.14198" aria-labelledby="oth-2411.14198">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Why do language models perform worse for morphologically complex languages? </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Arnett,+C">Catherine Arnett</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bergen,+B+K">Benjamin K. Bergen</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 9 pages </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span> </div> <p class='mathjax'> Language models perform differently across languages. It has been previously suggested that morphological typology may explain some of this variability (Cotterell et al., 2018). We replicate previous analyses and find additional new evidence for a performance gap between agglutinative and fusional languages, where fusional languages, such as English, tend to have better language modeling performance than morphologically more complex languages like Turkish. We then propose and test three possible causes for this performance gap: morphological alignment of tokenizers, tokenization quality, and disparities in dataset sizes and measurement. To test the morphological alignment hypothesis, we present MorphScore, a tokenizer evaluation metric, and supporting datasets for 22 languages. We find some evidence that tokenization quality explains the performance gap, but none for the role of morphological alignment. Instead we find that the performance gap is most reduced when training datasets are of equivalent size across language types, but only when scaled according to the so-called "byte-premium" -- the different encoding efficiencies of different languages and orthographies. These results suggest that no language is harder or easier for a language model to learn on the basis of its morphological typology. Differences in performance can be attributed to disparities in dataset size. These results bear on ongoing efforts to improve performance for low-performing and under-resourced languages. </p> </div> </dd> <dt> <a name='item258'>[258]</a> <a href ="/abs/2411.14199" title="Abstract" id="2411.14199"> arXiv:2411.14199 </a> [<a href="/pdf/2411.14199" title="Download PDF" id="pdf-2411.14199" aria-labelledby="pdf-2411.14199">pdf</a>, <a href="https://arxiv.org/html/2411.14199v1" title="View HTML" id="html-2411.14199" aria-labelledby="html-2411.14199" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14199" title="Other formats" id="oth-2411.14199" aria-labelledby="oth-2411.14199">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> OpenScholar: Synthesizing Scientific Literature with Retrieval-augmented LMs </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Asai,+A">Akari Asai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=He,+J">Jacqueline He</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shao,+R">Rulin Shao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shi,+W">Weijia Shi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Singh,+A">Amanpreet Singh</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chang,+J+C">Joseph Chee Chang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lo,+K">Kyle Lo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Soldaini,+L">Luca Soldaini</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Feldman,+S">Sergey Feldman</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=D'arcy,+M">Mike D'arcy</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wadden,+D">David Wadden</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Latzke,+M">Matt Latzke</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tian,+M">Minyang Tian</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ji,+P">Pan Ji</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+S">Shengyan Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tong,+H">Hao Tong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+B">Bohao Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xiong,+Y">Yanyu Xiong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zettlemoyer,+L">Luke Zettlemoyer</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Neubig,+G">Graham Neubig</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Weld,+D">Dan Weld</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Downey,+D">Doug Downey</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yih,+W">Wen-tau Yih</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Koh,+P+W">Pang Wei Koh</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hajishirzi,+H">Hannaneh Hajishirzi</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI); Digital Libraries (cs.DL); Information Retrieval (cs.IR); Machine Learning (cs.LG) </div> <p class='mathjax'> Scientific progress depends on researchers' ability to synthesize the growing body of literature. Can large language models (LMs) assist scientists in this task? We introduce OpenScholar, a specialized retrieval-augmented LM that answers scientific queries by identifying relevant passages from 45 million open-access papers and synthesizing citation-backed responses. To evaluate OpenScholar, we develop ScholarQABench, the first large-scale multi-domain benchmark for literature search, comprising 2,967 expert-written queries and 208 long-form answers across computer science, physics, neuroscience, and biomedicine. On ScholarQABench, OpenScholar-8B outperforms GPT-4o by 5% and PaperQA2 by 7% in correctness, despite being a smaller, open model. While GPT4o hallucinates citations 78 to 90% of the time, OpenScholar achieves citation accuracy on par with human experts. OpenScholar's datastore, retriever, and self-feedback inference loop also improves off-the-shelf LMs: for instance, OpenScholar-GPT4o improves GPT-4o's correctness by 12%. In human evaluations, experts preferred OpenScholar-8B and OpenScholar-GPT4o responses over expert-written ones 51% and 70% of the time, respectively, compared to GPT4o's 32%. We open-source all of our code, models, datastore, data and a public demo. </p> </div> </dd> <dt> <a name='item259'>[259]</a> <a href ="/abs/2411.14201" title="Abstract" id="2411.14201"> arXiv:2411.14201 </a> [<a href="/pdf/2411.14201" title="Download PDF" id="pdf-2411.14201" aria-labelledby="pdf-2411.14201">pdf</a>, <a href="https://arxiv.org/html/2411.14201v1" title="View HTML" id="html-2411.14201" aria-labelledby="html-2411.14201" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14201" title="Other formats" id="oth-2411.14201" aria-labelledby="oth-2411.14201">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Regional Attention for Shadow Removal </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+H">Hengxing Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+M">Mingjia Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Guo,+X">Xiaojie Guo</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Shadow, as a natural consequence of light interacting with objects, plays a crucial role in shaping the aesthetics of an image, which however also impairs the content visibility and overall visual quality. Recent shadow removal approaches employ the mechanism of attention, due to its effectiveness, as a key component. However, they often suffer from two issues including large model size and high computational complexity for practical use. To address these shortcomings, this work devises a lightweight yet accurate shadow removal framework. First, we analyze the characteristics of the shadow removal task to seek the key information required for reconstructing shadow regions and designing a novel regional attention mechanism to effectively capture such information. Then, we customize a Regional Attention Shadow Removal Model (RASM, in short), which leverages non-shadow areas to assist in restoring shadow ones. Unlike existing attention-based models, our regional attention strategy allows each shadow region to interact more rationally with its surrounding non-shadow areas, for seeking the regional contextual correlation between shadow and non-shadow areas. Extensive experiments are conducted to demonstrate that our proposed method delivers superior performance over other state-of-the-art models in terms of accuracy and efficiency, making it appealing for practical applications. </p> </div> </dd> <dt> <a name='item260'>[260]</a> <a href ="/abs/2411.14202" title="Abstract" id="2411.14202"> arXiv:2411.14202 </a> [<a href="/pdf/2411.14202" title="Download PDF" id="pdf-2411.14202" aria-labelledby="pdf-2411.14202">pdf</a>, <a href="https://arxiv.org/html/2411.14202v1" title="View HTML" id="html-2411.14202" aria-labelledby="html-2411.14202" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14202" title="Other formats" id="oth-2411.14202" aria-labelledby="oth-2411.14202">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Revised Regularization for Efficient Continual Learning through Correlation-Based Parameter Update in Bayesian Neural Networks </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Palit,+S">Sanchar Palit</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Banerjee,+B">Biplab Banerjee</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chaudhuri,+S">Subhasis Chaudhuri</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> at ICVGIP 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> We propose a Bayesian neural network-based continual learning algorithm using Variational Inference, aiming to overcome several drawbacks of existing methods. Specifically, in continual learning scenarios, storing network parameters at each step to retain knowledge poses challenges. This is compounded by the crucial need to mitigate catastrophic forgetting, particularly given the limited access to past datasets, which complicates maintaining correspondence between network parameters and datasets across all sessions. Current methods using Variational Inference with KL divergence risk catastrophic forgetting during uncertain node updates and coupled disruptions in certain nodes. To address these challenges, we propose the following strategies. To reduce the storage of the dense layer parameters, we propose a parameter distribution learning method that significantly reduces the storage requirements. In the continual learning framework employing variational inference, our study introduces a regularization term that specifically targets the dynamics and population of the mean and variance of the parameters. This term aims to retain the benefits of KL divergence while addressing related challenges. To ensure proper correspondence between network parameters and the data, our method introduces an importance-weighted Evidence Lower Bound term to capture data and parameter correlations. This enables storage of common and distinctive parameter hyperspace bases. The proposed method partitions the parameter space into common and distinctive subspaces, with conditions for effective backward and forward knowledge transfer, elucidating the network-parameter dataset correspondence. The experimental results demonstrate the effectiveness of our method across diverse datasets and various combinations of sequential datasets, yielding superior performance compared to existing approaches. </p> </div> </dd> <dt> <a name='item261'>[261]</a> <a href ="/abs/2411.14205" title="Abstract" id="2411.14205"> arXiv:2411.14205 </a> [<a href="/pdf/2411.14205" title="Download PDF" id="pdf-2411.14205" aria-labelledby="pdf-2411.14205">pdf</a>, <a href="https://arxiv.org/html/2411.14205v1" title="View HTML" id="html-2411.14205" aria-labelledby="html-2411.14205" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14205" title="Other formats" id="oth-2411.14205" aria-labelledby="oth-2411.14205">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Is this Generated Person Existed in Real-world? Fine-grained Detecting and Calibrating Abnormal Human-body </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Z">Zeqing Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ma,+Q">Qingyang Ma</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wan,+W">Wentao Wan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+H">Haojie Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+K">Keze Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tian,+Y">Yonghong Tian</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 16 pages, 14 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Recent improvements in visual synthesis have significantly enhanced the depiction of generated human photos, which are pivotal due to their wide applicability and demand. Nonetheless, the existing text-to-image or text-to-video models often generate low-quality human photos that might differ considerably from real-world body structures, referred to as "abnormal human bodies". Such abnormalities, typically deemed unacceptable, pose considerable challenges in the detection and repair of them within human photos. These challenges require precise abnormality recognition capabilities, which entail pinpointing both the location and the abnormality type. Intuitively, Visual Language Models (VLMs) that have obtained remarkable performance on various visual tasks are quite suitable for this task. However, their performance on abnormality detection in human photos is quite poor. Hence, it is quite important to highlight this task for the research community. In this paper, we first introduce a simple yet challenging task, i.e., \textbf{F}ine-grained \textbf{H}uman-body \textbf{A}bnormality \textbf{D}etection \textbf{(FHAD)}, and construct two high-quality datasets for evaluation. Then, we propose a meticulous framework, named HumanCalibrator, which identifies and repairs abnormalities in human body structures while preserving the other content. Experiments indicate that our HumanCalibrator achieves high accuracy in abnormality detection and accomplishes an increase in visual comparisons while preserving the other visual content. </p> </div> </dd> <dt> <a name='item262'>[262]</a> <a href ="/abs/2411.14207" title="Abstract" id="2411.14207"> arXiv:2411.14207 </a> [<a href="/pdf/2411.14207" title="Download PDF" id="pdf-2411.14207" aria-labelledby="pdf-2411.14207">pdf</a>, <a href="https://arxiv.org/html/2411.14207v1" title="View HTML" id="html-2411.14207" aria-labelledby="html-2411.14207" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14207" title="Other formats" id="oth-2411.14207" aria-labelledby="oth-2411.14207">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> HARP: A Large-Scale Higher-Order Ambisonic Room Impulse Response Dataset </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Saini,+S">Shivam Saini</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Peissig,+J">J眉rgen Peissig</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Submitted to ICASSP 2025 Workshop Dataset and code to be uploaded at: <a href="https://github.com/whojavumusic/HARP" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Artificial Intelligence (cs.AI); Multimedia (cs.MM); Audio and Speech Processing (eess.AS) </div> <p class='mathjax'> This contribution introduces a dataset of 7th-order Ambisonic Room Impulse Responses (HOA-RIRs), created using the Image Source Method. By employing higher-order Ambisonics, our dataset enables precise spatial audio reproduction, a critical requirement for realistic immersive audio applications. Leveraging the virtual simulation, we present a unique microphone configuration, based on the superposition principle, designed to optimize sound field coverage while addressing the limitations of traditional microphone arrays. The presented 64-microphone configuration allows us to capture RIRs directly in the Spherical Harmonics domain. The dataset features a wide range of room configurations, encompassing variations in room geometry, acoustic absorption materials, and source-receiver distances. A detailed description of the simulation setup is provided alongside for an accurate reproduction. The dataset serves as a vital resource for researchers working on spatial audio, particularly in applications involving machine learning to improve room acoustics modeling and sound field synthesis. It further provides a very high level of spatial resolution and realism crucial for tasks such as source localization, reverberation prediction, and immersive sound reproduction. </p> </div> </dd> <dt> <a name='item263'>[263]</a> <a href ="/abs/2411.14208" title="Abstract" id="2411.14208"> arXiv:2411.14208 </a> [<a href="/pdf/2411.14208" title="Download PDF" id="pdf-2411.14208" aria-labelledby="pdf-2411.14208">pdf</a>, <a href="https://arxiv.org/html/2411.14208v1" title="View HTML" id="html-2411.14208" aria-labelledby="html-2411.14208" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14208" title="Other formats" id="oth-2411.14208" aria-labelledby="oth-2411.14208">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Novel View Extrapolation with Video Diffusion Priors </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+K">Kunhao Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shao,+L">Ling Shao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lu,+S">Shijian Lu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> The field of novel view synthesis has made significant strides thanks to the development of radiance field methods. However, most radiance field techniques are far better at novel view interpolation than novel view extrapolation where the synthesis novel views are far beyond the observed training views. We design ViewExtrapolator, a novel view synthesis approach that leverages the generative priors of Stable Video Diffusion (SVD) for realistic novel view extrapolation. By redesigning the SVD denoising process, ViewExtrapolator refines the artifact-prone views rendered by radiance fields, greatly enhancing the clarity and realism of the synthesized novel views. ViewExtrapolator is a generic novel view extrapolator that can work with different types of 3D rendering such as views rendered from point clouds when only a single view or monocular video is available. Additionally, ViewExtrapolator requires no fine-tuning of SVD, making it both data-efficient and computation-efficient. Extensive experiments demonstrate the superiority of ViewExtrapolator in novel view extrapolation. Project page: \url{<a href="https://kunhao-liu.github.io/ViewExtrapolator/" rel="external noopener nofollow" class="link-external link-https">this https URL</a>}. </p> </div> </dd> <dt> <a name='item264'>[264]</a> <a href ="/abs/2411.14213" title="Abstract" id="2411.14213"> arXiv:2411.14213 </a> [<a href="/pdf/2411.14213" title="Download PDF" id="pdf-2411.14213" aria-labelledby="pdf-2411.14213">pdf</a>, <a href="https://arxiv.org/html/2411.14213v1" title="View HTML" id="html-2411.14213" aria-labelledby="html-2411.14213" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14213" title="Other formats" id="oth-2411.14213" aria-labelledby="oth-2411.14213">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Generative Outpainting To Enhance the Memorability of Short-Form Videos </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Byju,+A">Alan Byju</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ladwa,+A+S">Aman Sudhindra Ladwa</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sweeney,+L">Lorin Sweeney</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Smeaton,+A+F">Alan F. Smeaton</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> With the expanding use of the short-form video format in advertising, social media, entertainment, education and more, there is a need for such media to both captivate and be remembered. Video memorability indicates to us how likely a video is to be remembered by a viewer who has no emotional or personal connection with its content. This paper presents the results of using generative outpainting to expand the screen size of a short-form video with a view to improving its memorability. Advances in machine learning and deep learning are compared and leveraged to understand how extending the borders of video screensizes can affect their memorability to viewers. Using quantitative evaluation we determine the best-performing model for outpainting and the impact of outpainting based on image saliency on video memorability scores </p> </div> </dd> <dt> <a name='item265'>[265]</a> <a href ="/abs/2411.14214" title="Abstract" id="2411.14214"> arXiv:2411.14214 </a> [<a href="/pdf/2411.14214" title="Download PDF" id="pdf-2411.14214" aria-labelledby="pdf-2411.14214">pdf</a>, <a href="https://arxiv.org/html/2411.14214v1" title="View HTML" id="html-2411.14214" aria-labelledby="html-2411.14214" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14214" title="Other formats" id="oth-2411.14214" aria-labelledby="oth-2411.14214">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Physics-Informed LLM-Agent for Automated Modulation Design in Power Electronics Systems </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+J">Junhua Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lin,+F">Fanfan Lin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+X">Xinze Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lim,+K+H">Kwan Hui Lim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+S">Shuai Zhao</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Artificial Intelligence (cs.AI)</span>; Emerging Technologies (cs.ET) </div> <p class='mathjax'> LLM-based autonomous agents have demonstrated outstanding performance in solving complex industrial tasks. However, in the pursuit of carbon neutrality and high-performance renewable energy systems, existing AI-assisted design automation faces significant limitations in explainability, scalability, and usability. To address these challenges, we propose LP-COMDA, an LLM-based, physics-informed autonomous agent that automates the modulation design of power converters in Power Electronics Systems with minimal human supervision. Unlike traditional AI-assisted approaches, LP-COMDA contains an LLM-based planner that gathers and validates design specifications through a user-friendly chat interface. The planner then coordinates with physics-informed design and optimization tools to iteratively generate and refine modulation designs autonomously. Through the chat interface, LP-COMDA provides an explainable design process, presenting explanations and charts. Experiments show that LP-COMDA outperforms all baseline methods, achieving a 63.2% reduction in error compared to the second-best benchmark method in terms of standard mean absolute error. Furthermore, empirical studies with 20 experts conclude that design time with LP-COMDA is over 33 times faster than conventional methods, showing its significant improvement on design efficiency over the current processes. </p> </div> </dd> <dt> <a name='item266'>[266]</a> <a href ="/abs/2411.14215" title="Abstract" id="2411.14215"> arXiv:2411.14215 </a> [<a href="/pdf/2411.14215" title="Download PDF" id="pdf-2411.14215" aria-labelledby="pdf-2411.14215">pdf</a>, <a href="https://arxiv.org/html/2411.14215v1" title="View HTML" id="html-2411.14215" aria-labelledby="html-2411.14215" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14215" title="Other formats" id="oth-2411.14215" aria-labelledby="oth-2411.14215">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Evaluating the Robustness of Analogical Reasoning in Large Language Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Lewis,+M">Martha Lewis</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mitchell,+M">Melanie Mitchell</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 31 pages, 13 figures. arXiv admin note: text overlap with <a href="https://arxiv.org/abs/2402.08955" data-arxiv-id="2402.08955" class="link-https">arXiv:2402.08955</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> LLMs have performed well on several reasoning benchmarks, including ones that test analogical reasoning abilities. However, there is debate on the extent to which they are performing general abstract reasoning versus employing non-robust processes, e.g., that overly rely on similarity to pre-training data. Here we investigate the robustness of analogy-making abilities previously claimed for LLMs on three of four domains studied by Webb, Holyoak, and Lu (2023): letter-string analogies, digit matrices, and story analogies. For each domain we test humans and GPT models on robustness to variants of the original analogy problems that test the same abstract reasoning abilities but are likely dissimilar from tasks in the pre-training data. The performance of a system that uses robust abstract reasoning should not decline substantially on these variants. <br>On simple letter-string analogies, we find that while the performance of humans remains high for two types of variants we tested, the GPT models' performance declines sharply. This pattern is less pronounced as the complexity of these problems is increased, as both humans and GPT models perform poorly on both the original and variant problems requiring more complex analogies. On digit-matrix problems, we find a similar pattern but only on one out of the two types of variants we tested. On story-based analogy problems, we find that, unlike humans, the performance of GPT models are susceptible to answer-order effects, and that GPT models also may be more sensitive than humans to paraphrasing. <br>This work provides evidence that LLMs often lack the robustness of zero-shot human analogy-making, exhibiting brittleness on most of the variations we tested. More generally, this work points to the importance of carefully evaluating AI systems not only for accuracy but also robustness when testing their cognitive capabilities. </p> </div> </dd> <dt> <a name='item267'>[267]</a> <a href ="/abs/2411.14219" title="Abstract" id="2411.14219"> arXiv:2411.14219 </a> [<a href="/pdf/2411.14219" title="Download PDF" id="pdf-2411.14219" aria-labelledby="pdf-2411.14219">pdf</a>, <a href="/format/2411.14219" title="Other formats" id="oth-2411.14219" aria-labelledby="oth-2411.14219">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Towards Context-Rich Automated Biodiversity Assessments: Deriving AI-Powered Insights from Camera Trap Data </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Fergus,+P">Paul Fergus</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chalmers,+C">Carl Chalmers</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Matthews,+N">Naomi Matthews</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nixon,+S">Stuart Nixon</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Burger,+A">Andre Burger</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hartley,+O">Oliver Hartley</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sutherland,+C">Chris Sutherland</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lambin,+X">Xavier Lambin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Longmore,+S">Steven Longmore</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wich,+S">Serge Wich</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 32 Pages, 22 images </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Camera traps offer enormous new opportunities in ecological studies, but current automated image analysis methods often lack the contextual richness needed to support impactful conservation outcomes. Here we present an integrated approach that combines deep learning-based vision and language models to improve ecological reporting using data from camera traps. We introduce a two-stage system: YOLOv10-X to localise and classify species (mammals and birds) within images, and a Phi-3.5-vision-instruct model to read YOLOv10-X binding box labels to identify species, overcoming its limitation with hard to classify objects in images. Additionally, Phi-3.5 detects broader variables, such as vegetation type, and time of day, providing rich ecological and environmental context to YOLO's species detection output. When combined, this output is processed by the model's natural language system to answer complex queries, and retrieval-augmented generation (RAG) is employed to enrich responses with external information, like species weight and IUCN status (information that cannot be obtained through direct visual analysis). This information is used to automatically generate structured reports, providing biodiversity stakeholders with deeper insights into, for example, species abundance, distribution, animal behaviour, and habitat selection. Our approach delivers contextually rich narratives that aid in wildlife management decisions. By providing contextually rich insights, our approach not only reduces manual effort but also supports timely decision-making in conservation, potentially shifting efforts from reactive to proactive management. </p> </div> </dd> <dt> <a name='item268'>[268]</a> <a href ="/abs/2411.14222" title="Abstract" id="2411.14222"> arXiv:2411.14222 </a> [<a href="/pdf/2411.14222" title="Download PDF" id="pdf-2411.14222" aria-labelledby="pdf-2411.14222">pdf</a>, <a href="https://arxiv.org/html/2411.14222v1" title="View HTML" id="html-2411.14222" aria-labelledby="html-2411.14222" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14222" title="Other formats" id="oth-2411.14222" aria-labelledby="oth-2411.14222">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Generative AI-enabled Digital Twins for 6G-enhanced Smart Cities </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Duran,+K">Kubra Duran</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cakir,+L+V">Lal Verda Cakir</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ozdem,+M">Mehmet Ozdem</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gursu,+K">Kerem Gursu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Canberk,+B">Berk Canberk</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Networking and Internet Architecture (cs.NI)</span> </div> <p class='mathjax'> 6G networks are envisioned to enable a wide range of applications, such as autonomous vehicles and smart cities. However, this rapid expansion of network topologies makes the management of 6G wireless networks more complex and leads to performance degradation. Even though state-of-the-art applications on network services are providing promising results, they also risk disrupting the network's performance. To overcome this, the services have to leverage what-if implementations covering a variety of scenarios. At this point, traditional simulations fall short of encompassing the dynamism and complexity of a physical network. To overcome these challenges, we propose the Generative AI-based Digital Twins. For this, we derive an optimization formula to differentiate different network scenarios by considering the specific key performance indicators (KPIs) for wireless networks. Then, we fed this formula to the generative AI with the historical twins and real-time twins to start generating the desired topologies. To evaluate the performance, we implement network and smart-city-oriented services, namely massive connectivity, tiny instant communication, right-time synchronization, and truck path routes. The simulation results reveal that our approach can achieve 38% more stable network throughput in high device density scenarios. Furthermore, the generated scenario accuracy is able to reach up to 98% level, surpassing the baselines. </p> </div> </dd> <dt> <a name='item269'>[269]</a> <a href ="/abs/2411.14224" title="Abstract" id="2411.14224"> arXiv:2411.14224 </a> [<a href="/pdf/2411.14224" title="Download PDF" id="pdf-2411.14224" aria-labelledby="pdf-2411.14224">pdf</a>, <a href="https://arxiv.org/html/2411.14224v1" title="View HTML" id="html-2411.14224" aria-labelledby="html-2411.14224" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14224" title="Other formats" id="oth-2411.14224" aria-labelledby="oth-2411.14224">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Thermodynamic Algorithms for Quadratic Programming </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Bartosik,+P">Patryk-Lipka Bartosik</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Donatella,+K">Kaelan Donatella</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Aifer,+M">Maxwell Aifer</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Melanson,+D">Denis Melanson</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Perarnau-Llobet,+M">Marti Perarnau-Llobet</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Brunner,+N">Nicolas Brunner</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Coles,+P+J">Patrick J. Coles</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 13 pages, 4 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Emerging Technologies (cs.ET)</span>; Statistical Mechanics (cond-mat.stat-mech); Optimization and Control (math.OC) </div> <p class='mathjax'> Thermodynamic computing has emerged as a promising paradigm for accelerating computation by harnessing the thermalization properties of physical systems. This work introduces a novel approach to solving quadratic programming problems using thermodynamic hardware. By incorporating a thermodynamic subroutine for solving linear systems into the interior-point method, we present a hybrid digital-analog algorithm that outperforms traditional digital algorithms in terms of speed. Notably, we achieve a polynomial asymptotic speedup compared to conventional digital approaches. Additionally, we simulate the algorithm for a support vector machine and predict substantial practical speedups with only minimal degradation in solution quality. Finally, we detail how our method can be applied to portfolio optimization and the simulation of nonlinear resistive networks. </p> </div> </dd> <dt> <a name='item270'>[270]</a> <a href ="/abs/2411.14226" title="Abstract" id="2411.14226"> arXiv:2411.14226 </a> [<a href="/pdf/2411.14226" title="Download PDF" id="pdf-2411.14226" aria-labelledby="pdf-2411.14226">pdf</a>, <a href="https://arxiv.org/html/2411.14226v1" title="View HTML" id="html-2411.14226" aria-labelledby="html-2411.14226" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14226" title="Other formats" id="oth-2411.14226" aria-labelledby="oth-2411.14226">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Regularization and passivity-preserving model reduction of quasilinear magneto-quasistatic coupled problems </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Kerler-Back,+J">Johanna Kerler-Back</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Reis,+T">Timo Reis</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Stykel,+T">Tatjana Stykel</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 33 pages, 5 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Numerical Analysis (math.NA)</span> </div> <p class='mathjax'> We consider the quasilinear magneto-quasistatic field equations that arise in the simulation of low-frequency electromagnetic devices coupled to electrical circuits. Spatial discretization of these equations on 3D domains using the finite element method results in a singular system of differential-algebraic equations (DAEs). First, we analyze the structural properties of this system and present a novel regularization approach based on projecting out the singular state components. Next, we explore the passivity of the variational magneto-quasistatic problem and its discretization by defining suitable storage functions. For model reduction of the magneto-quasistatic system, we employ the proper orthogonal decomposition (POD) technique combined with the discrete empirical interpolation method (DEIM), to facilitate efficient evaluation of the system's nonlinearities. Our model reduction approach involves the transformation of the regularized DAE into a system of ordinary differential equations, leveraging a special block structure inherent in the problem, followed by applying standard model reduction techniques to the transformed system. We prove that the POD-reduced model preserves passivity, and for the POD-DEIM-reduced model, we propose to enforce passivity by perturbing the output in a way that accounts for DEIM errors. Numerical experiments illustrate the effectiveness of the presented model reduction methods and the passivity enforcement technique. </p> </div> </dd> <dt> <a name='item271'>[271]</a> <a href ="/abs/2411.14228" title="Abstract" id="2411.14228"> arXiv:2411.14228 </a> [<a href="/pdf/2411.14228" title="Download PDF" id="pdf-2411.14228" aria-labelledby="pdf-2411.14228">pdf</a>, <a href="https://arxiv.org/html/2411.14228v1" title="View HTML" id="html-2411.14228" aria-labelledby="html-2411.14228" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14228" title="Other formats" id="oth-2411.14228" aria-labelledby="oth-2411.14228">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> FocusLLaVA: A Coarse-to-Fine Approach for Efficient and Effective Visual Token Compression </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+Y">Yuke Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xie,+C">Chi Xie</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liang,+S">Shuang Liang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zheng,+B">Bo Zheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Guo,+S">Sheng Guo</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Recent advances on Multi-modal Large Language Models have demonstrated that high-resolution image input is crucial for model capabilities, especially for fine-grained tasks. However, high-resolution images lead to a quadratic increase in the number of visual tokens input into LLMs, resulting in significant computational costs. Current work develop visual token compression methods to achieve efficiency improvements, often at the expense of performance. We argue that removing visual redundancy can simultaneously improve both efficiency and performance. We build a coarse-to-fine visual token compression method, with a vision-guided sampler for compressing redundant regions with low information density, and a text-guided sampler for selecting visual tokens that are strongly correlated with the user <a href="http://instructions.With" rel="external noopener nofollow" class="link-external link-http">this http URL</a> these two modules, the proposed FocusLLaVA achieves improvements in both efficiency and performance. We validate the effectiveness of our approach on a wide range of evaluation datasets. </p> </div> </dd> <dt> <a name='item272'>[272]</a> <a href ="/abs/2411.14233" title="Abstract" id="2411.14233"> arXiv:2411.14233 </a> [<a href="/pdf/2411.14233" title="Download PDF" id="pdf-2411.14233" aria-labelledby="pdf-2411.14233">pdf</a>, <a href="https://arxiv.org/html/2411.14233v1" title="View HTML" id="html-2411.14233" aria-labelledby="html-2411.14233" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14233" title="Other formats" id="oth-2411.14233" aria-labelledby="oth-2411.14233">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A qualitative analysis of remote patient monitoring: how a paradox mindset can support balancing emotional tensions in the design of healthcare technologies </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Jonassen,+Z">Zoe Jonassen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lawrence,+K">Katharine Lawrence</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wiesenfeld,+B+M">Batia Mishan Wiesenfeld</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Feuerriegel,+S">Stefan Feuerriegel</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mann,+D">Devin Mann</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted at CSCW 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Human-Computer Interaction (cs.HC)</span> </div> <p class='mathjax'> Remote patient monitoring (RPM) is the use of digital technologies to improve patient care at a distance. However, current RPM solutions are often biased toward tech-savvy patients. To foster health equity, researchers have studied how to address the socio-economic and cognitive needs of diverse patient groups, but their emotional needs have remained largely neglected. We perform the first qualitative study to explore the emotional needs of diverse patients around RPM. Specifically, we conduct a thematic analysis of 18 interviews and 4 focus groups at a large US healthcare organization. We identify emotional needs that lead to four emotional tensions within and across stakeholder groups when applying an equity focus to the design and implementation of RPM technologies. The four emotional tensions are making diverse patients feel: (i) heard vs. exploited; (ii) seen vs. deprioritized for efficiency; (iii) empowered vs. anxious; and (iv) cared for vs. detached from care. To manage these emotional tensions across stakeholders, we develop design recommendations informed by a paradox mindset (i.e., "both-and" rather than "and-or" strategies). </p> </div> </dd> <dt> <a name='item273'>[273]</a> <a href ="/abs/2411.14242" title="Abstract" id="2411.14242"> arXiv:2411.14242 </a> [<a href="/pdf/2411.14242" title="Download PDF" id="pdf-2411.14242" aria-labelledby="pdf-2411.14242">pdf</a>, <a href="https://arxiv.org/html/2411.14242v1" title="View HTML" id="html-2411.14242" aria-labelledby="html-2411.14242" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14242" title="Other formats" id="oth-2411.14242" aria-labelledby="oth-2411.14242">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Approximate Constrained Lumping of Chemical Reaction Networks </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Leguizamon-Robayo,+A">Alexander Leguizamon-Robayo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jim%C3%A9nez-Pastor,+A">Antonio Jim茅nez-Pastor</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tribastone,+M">Micro Tribastone</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tschaikowski,+M">Max Tschaikowski</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Vandin,+A">Andrea Vandin</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computational Engineering, Finance, and Science (cs.CE)</span>; Molecular Networks (q-bio.MN); Quantitative Methods (q-bio.QM) </div> <p class='mathjax'> Gaining insights from realistic dynamical models of biochemical systems can be challenging given their large number of state variables. Model reduction techniques can mitigate this by decreasing complexity by mapping the model onto a lower-dimensional state space. Exact constrained lumping identifies reductions as linear combinations of the original state variables in systems of nonlinear ordinary differential equations, preserving specific user-defined output variables without error. However, exact reductions can be too stringent in practice, as model parameters are often uncertain or imprecise -- a particularly relevant problem for biochemical systems. We propose approximate constrained lumping. It allows for a relaxation of exactness within a given tolerance parameter $\varepsilon$, while still working in polynomial time. We prove that the accuracy, i.e., the difference between the output variables in the original and reduced model, is in the order of $\varepsilon$. Furthermore, we provide a heuristic algorithm to find the smallest $\varepsilon$ for a given maximum allowable size of the lumped system. Our method is applied to several models from the literature, resulting in coarser aggregations than exact lumping while still capturing the dynamics of the original system accurately. </p> </div> </dd> <dt> <a name='item274'>[274]</a> <a href ="/abs/2411.14243" title="Abstract" id="2411.14243"> arXiv:2411.14243 </a> [<a href="/pdf/2411.14243" title="Download PDF" id="pdf-2411.14243" aria-labelledby="pdf-2411.14243">pdf</a>, <a href="https://arxiv.org/html/2411.14243v1" title="View HTML" id="html-2411.14243" aria-labelledby="html-2411.14243" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14243" title="Other formats" id="oth-2411.14243" aria-labelledby="oth-2411.14243">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> AnywhereDoor: Multi-Target Backdoor Attacks on Object Detection </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Lu,+J">Jialin Lu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shan,+J">Junjie Shan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+Z">Ziqi Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chow,+K">Ka-Ho Chow</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Cryptography and Security (cs.CR)</span>; Artificial Intelligence (cs.AI); Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> As object detection becomes integral to many safety-critical applications, understanding its vulnerabilities is essential. Backdoor attacks, in particular, pose a significant threat by implanting hidden backdoor in a victim model, which adversaries can later exploit to trigger malicious behaviors during inference. However, current backdoor techniques are limited to static scenarios where attackers must define a malicious objective before training, locking the attack into a predetermined action without inference-time adaptability. Given the expressive output space in object detection, including object existence detection, bounding box estimation, and object classification, the feasibility of implanting a backdoor that provides inference-time control with a high degree of freedom remains unexplored. This paper introduces AnywhereDoor, a flexible backdoor attack tailored for object detection. Once implanted, AnywhereDoor enables adversaries to specify different attack types (object vanishing, fabrication, or misclassification) and configurations (untargeted or targeted with specific classes) to dynamically control detection behavior. This flexibility is achieved through three key innovations: (i) objective disentanglement to support a broader range of attack combinations well beyond what existing methods allow; (ii) trigger mosaicking to ensure backdoor activations are robust, even against those object detectors that extract localized regions from the input image for recognition; and (iii) strategic batching to address object-level data imbalances that otherwise hinders a balanced manipulation. Extensive experiments demonstrate that AnywhereDoor provides attackers with a high degree of control, achieving an attack success rate improvement of nearly 80% compared to adaptations of existing methods for such flexible control. </p> </div> </dd> <dt> <a name='item275'>[275]</a> <a href ="/abs/2411.14245" title="Abstract" id="2411.14245"> arXiv:2411.14245 </a> [<a href="/pdf/2411.14245" title="Download PDF" id="pdf-2411.14245" aria-labelledby="pdf-2411.14245">pdf</a>, <a href="https://arxiv.org/html/2411.14245v1" title="View HTML" id="html-2411.14245" aria-labelledby="html-2411.14245" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14245" title="Other formats" id="oth-2411.14245" aria-labelledby="oth-2411.14245">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Pulsar Consensus </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Afach,+S">Samer Afach</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Marsh,+B">Benjamin Marsh</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rubboli,+E">Enrico Rubboli</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Mintlayer consensus overview </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Cryptography and Security (cs.CR)</span> </div> <p class='mathjax'> In this paper, we informally introduce the Pulsar proof of stake consensus paper and discuss the relevant design decisions and considerations. The Pulsar protocol we propose is designed to facilitate the creation of a proof of stake sidechain for a proof of work blockchain. We present an overview of a novel composable density-based chain selection rule for proof of stake systems which can be seen as a superset of some standard existing longest chain rules for proof of stake protocols. We discuss the Pulsar protocol in comparison to existing proof of stake protocols and define its benefits over existing designs while defining the limitations of the work. Pulsar is currently implemented in the Mintlayer proof of stake Bitcoin sidechain. </p> </div> </dd> <dt> <a name='item276'>[276]</a> <a href ="/abs/2411.14246" title="Abstract" id="2411.14246"> arXiv:2411.14246 </a> [<a href="/pdf/2411.14246" title="Download PDF" id="pdf-2411.14246" aria-labelledby="pdf-2411.14246">pdf</a>, <a href="https://arxiv.org/html/2411.14246v1" title="View HTML" id="html-2411.14246" aria-labelledby="html-2411.14246" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14246" title="Other formats" id="oth-2411.14246" aria-labelledby="oth-2411.14246">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Simulation-Aided Policy Tuning for Black-Box Robot Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=He,+S">Shiming He</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=von+Rohr,+A">Alexander von Rohr</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Baumann,+D">Dominik Baumann</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xiang,+J">Ji Xiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Trimpe,+S">Sebastian Trimpe</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span>; Machine Learning (cs.LG); Systems and Control (eess.SY) </div> <p class='mathjax'> How can robots learn and adapt to new tasks and situations with little data? Systematic exploration and simulation are crucial tools for efficient robot learning. We present a novel black-box policy search algorithm focused on data-efficient policy improvements. The algorithm learns directly on the robot and treats simulation as an additional information source to speed up the learning process. At the core of the algorithm, a probabilistic model learns the dependence of the policy parameters and the robot learning objective not only by performing experiments on the robot, but also by leveraging data from a simulator. This substantially reduces interaction time with the robot. Using this model, we can guarantee improvements with high probability for each policy update, thereby facilitating fast, goal-oriented learning. We evaluate our algorithm on simulated fine-tuning tasks and demonstrate the data-efficiency of the proposed dual-information source optimization algorithm. In a real robot learning experiment, we show fast and successful task learning on a robot manipulator with the aid of an imperfect simulator. </p> </div> </dd> <dt> <a name='item277'>[277]</a> <a href ="/abs/2411.14249" title="Abstract" id="2411.14249"> arXiv:2411.14249 </a> [<a href="/pdf/2411.14249" title="Download PDF" id="pdf-2411.14249" aria-labelledby="pdf-2411.14249">pdf</a>, <a href="https://arxiv.org/html/2411.14249v1" title="View HTML" id="html-2411.14249" aria-labelledby="html-2411.14249" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14249" title="Other formats" id="oth-2411.14249" aria-labelledby="oth-2411.14249">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Towards a Physics Engine to Simulate Robotic Laser Surgery: Finite Element Modeling of Thermal Laser-Tissue Interactions </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Pacheco,+N+E">Nicholas E. Pacheco</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+K">Kang Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Reyes,+A+S">Ashley S. Reyes</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pacheco,+C+J">Christopher J. Pacheco</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Burstein,+L">Lucas Burstein</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fichera,+L">Loris Fichera</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Submitted to the International Symposium on Medical Robotics 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span> </div> <p class='mathjax'> This paper presents a computational model, based on the Finite Element Method (FEM), that simulates the thermal response of laser-irradiated tissue. This model addresses a gap in the current ecosystem of surgical robot simulators, which generally lack support for lasers and other energy-based end effectors. In the proposed model, the thermal dynamics of the tissue are calculated as the solution to a heat conduction problem with appropriate boundary conditions. The FEM formulation allows the model to capture complex phenomena, such as convection, which is crucial for creating realistic simulations. The accuracy of the model was verified via benchtop laser-tissue interaction experiments using agar tissue phantoms and ex-vivo chicken muscle. The results revealed an average root-mean-square error (RMSE) of less than 2 degrees Celsius across most experimental conditions. </p> </div> </dd> <dt> <a name='item278'>[278]</a> <a href ="/abs/2411.14251" title="Abstract" id="2411.14251"> arXiv:2411.14251 </a> [<a href="/pdf/2411.14251" title="Download PDF" id="pdf-2411.14251" aria-labelledby="pdf-2411.14251">pdf</a>, <a href="/format/2411.14251" title="Other formats" id="oth-2411.14251" aria-labelledby="oth-2411.14251">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Natural Language Reinforcement Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Feng,+X">Xidong Feng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wan,+Z">Ziyu Wan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fu,+H">Haotian Fu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+B">Bo Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+M">Mengyue Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Koushik,+G+A">Girish A. Koushik</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hu,+Z">Zhiyuan Hu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wen,+Y">Ying Wen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+J">Jun Wang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Extension of <a href="https://arxiv.org/abs/2402.07157" data-arxiv-id="2402.07157" class="link-https">arXiv:2402.07157</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Computation and Language (cs.CL) </div> <p class='mathjax'> Reinforcement Learning (RL) mathematically formulates decision-making with Markov Decision Process (MDP). With MDPs, researchers have achieved remarkable breakthroughs across various domains, including games, robotics, and language models. This paper seeks a new possibility, Natural Language Reinforcement Learning (NLRL), by extending traditional MDP to natural language-based representation space. Specifically, NLRL innovatively redefines RL principles, including task objectives, policy, value function, Bellman equation, and policy iteration, into their language counterparts. With recent advancements in large language models (LLMs), NLRL can be practically implemented to achieve RL-like policy and value improvement by either pure prompting or gradient-based training. Experiments over Maze, Breakthrough, and Tic-Tac-Toe games demonstrate the effectiveness, efficiency, and interpretability of the NLRL framework among diverse use cases. Our code will be released at <a href="https://github.com/waterhorse1/Natural-language-RL" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item279'>[279]</a> <a href ="/abs/2411.14252" title="Abstract" id="2411.14252"> arXiv:2411.14252 </a> [<a href="/pdf/2411.14252" title="Download PDF" id="pdf-2411.14252" aria-labelledby="pdf-2411.14252">pdf</a>, <a href="https://arxiv.org/html/2411.14252v1" title="View HTML" id="html-2411.14252" aria-labelledby="html-2411.14252" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14252" title="Other formats" id="oth-2411.14252" aria-labelledby="oth-2411.14252">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Intent-Aware Dialogue Generation and Multi-Task Contrastive Learning for Multi-Turn Intent Classification </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+J">Junhua Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tan,+Y+K">Yong Keat Tan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fu,+B">Bin Fu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lim,+K+H">Kwan Hui Lim</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Generating large-scale, domain-specific, multilingual multi-turn dialogue datasets remains a significant hurdle for training effective Multi-Turn Intent Classification models in chatbot systems. In this paper, we introduce Chain-of-Intent, a novel mechanism that combines Hidden Markov Models with Large Language Models (LLMs) to generate contextually aware, intent-driven conversations through self-play. By extracting domain-specific knowledge from e-commerce chat logs, we estimate conversation turns and intent transitions, which guide the generation of coherent dialogues. Leveraging LLMs to enhance emission probabilities, our approach produces natural and contextually consistent questions and answers. We also propose MINT-CL, a framework for multi-turn intent classification using multi-task contrastive learning, improving classification accuracy without the need for extensive annotated data. Evaluations show that our methods outperform baselines in dialogue quality and intent classification accuracy, especially in multilingual settings, while significantly reducing data generation efforts. Furthermore, we release MINT-E, a multilingual, intent-aware multi-turn e-commerce dialogue corpus to support future research in this area. </p> </div> </dd> <dt> <a name='item280'>[280]</a> <a href ="/abs/2411.14254" title="Abstract" id="2411.14254"> arXiv:2411.14254 </a> [<a href="/pdf/2411.14254" title="Download PDF" id="pdf-2411.14254" aria-labelledby="pdf-2411.14254">pdf</a>, <a href="https://arxiv.org/html/2411.14254v1" title="View HTML" id="html-2411.14254" aria-labelledby="html-2411.14254" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14254" title="Other formats" id="oth-2411.14254" aria-labelledby="oth-2411.14254">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> BERT-Based Approach for Automating Course Articulation Matrix Construction with Explainable AI </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Shiferaw,+N+A">Natenaile Asmamaw Shiferaw</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Leandre,+S+H">Simpenzwe Honore Leandre</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sinha,+A">Aman Sinha</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rout,+D">Dillip Rout</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 26 pages, 9 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Computers and Society (cs.CY) </div> <p class='mathjax'> Course Outcome (CO) and Program Outcome (PO)/Program-Specific Outcome (PSO) alignment is a crucial task for ensuring curriculum coherence and assessing educational effectiveness. The construction of a Course Articulation Matrix (CAM), which quantifies the relationship between COs and POs/PSOs, typically involves assigning numerical values (0, 1, 2, 3) to represent the degree of alignment. In this study, We experiment with four models from the BERT family: BERT Base, DistilBERT, ALBERT, and RoBERTa, and use multiclass classification to assess the alignment between CO and PO/PSO pairs. We first evaluate traditional machine learning classifiers, such as Decision Tree, Random Forest, and XGBoost, and then apply transfer learning to evaluate the performance of the pretrained BERT models. To enhance model interpretability, we apply Explainable AI technique, specifically Local Interpretable Model-agnostic Explanations (LIME), to provide transparency into the decision-making process. Our system achieves accuracy, precision, recall, and F1-score values of 98.66%, 98.67%, 98.66%, and 98.66%, respectively. This work demonstrates the potential of utilizing transfer learning with BERT-based models for the automated generation of CAMs, offering high performance and interpretability in educational outcome assessment. </p> </div> </dd> <dt> <a name='item281'>[281]</a> <a href ="/abs/2411.14256" title="Abstract" id="2411.14256"> arXiv:2411.14256 </a> [<a href="/pdf/2411.14256" title="Download PDF" id="pdf-2411.14256" aria-labelledby="pdf-2411.14256">pdf</a>, <a href="https://arxiv.org/html/2411.14256v1" title="View HTML" id="html-2411.14256" aria-labelledby="html-2411.14256" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14256" title="Other formats" id="oth-2411.14256" aria-labelledby="oth-2411.14256">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Generalizing End-To-End Autonomous Driving In Real-World Environments Using Zero-Shot LLMs </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Dong,+Z">Zeyu Dong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+Y">Yimin Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+Y">Yansong Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mahon,+K">Kevin Mahon</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sun,+Y">Yu Sun</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span> </div> <p class='mathjax'> Traditional autonomous driving methods adopt a modular design, decomposing tasks into sub-tasks. In contrast, end-to-end autonomous driving directly outputs actions from raw sensor data, avoiding error accumulation. However, training an end-to-end model requires a comprehensive dataset; otherwise, the model exhibits poor generalization capabilities. Recently, large language models (LLMs) have been applied to enhance the generalization capabilities of end-to-end driving models. Most studies explore LLMs in an open-loop manner, where the output actions are compared to those of experts without direct feedback from the real world, while others examine closed-loop results only in simulations. This paper proposes an efficient architecture that integrates multimodal LLMs into end-to-end driving models operating in closed-loop settings in real-world environments. In our architecture, the LLM periodically processes raw sensor data to generate high-level driving instructions, effectively guiding the end-to-end model, even at a slower rate than the raw sensor data. This architecture relaxes the trade-off between the latency and inference quality of the LLM. It also allows us to choose from a wide variety of LLMs to improve high-level driving instructions and minimize fine-tuning costs. Consequently, our architecture reduces data collection requirements because the LLMs do not directly output actions; we only need to train a simple imitation learning model to output actions. In our experiments, the training data for the end-to-end model in a real-world environment consists of only simple obstacle configurations with one traffic cone, while the test environment is more complex and contains multiple obstacles placed in various positions. Experiments show that the proposed architecture enhances the generalization capabilities of the end-to-end model even without fine-tuning the LLM. </p> </div> </dd> <dt> <a name='item282'>[282]</a> <a href ="/abs/2411.14257" title="Abstract" id="2411.14257"> arXiv:2411.14257 </a> [<a href="/pdf/2411.14257" title="Download PDF" id="pdf-2411.14257" aria-labelledby="pdf-2411.14257">pdf</a>, <a href="https://arxiv.org/html/2411.14257v1" title="View HTML" id="html-2411.14257" aria-labelledby="html-2411.14257" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14257" title="Other formats" id="oth-2411.14257" aria-labelledby="oth-2411.14257">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Do I Know This Entity? Knowledge Awareness and Hallucinations in Language Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ferrando,+J">Javier Ferrando</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Obeso,+O">Oscar Obeso</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rajamanoharan,+S">Senthooran Rajamanoharan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nanda,+N">Neel Nanda</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> Hallucinations in large language models are a widespread problem, yet the mechanisms behind whether models will hallucinate are poorly understood, limiting our ability to solve this problem. Using sparse autoencoders as an interpretability tool, we discover that a key part of these mechanisms is entity recognition, where the model detects if an entity is one it can recall facts about. Sparse autoencoders uncover meaningful directions in the representation space, these detect whether the model recognizes an entity, e.g. detecting it doesn't know about an athlete or a movie. This suggests that models can have self-knowledge: internal representations about their own capabilities. These directions are causally relevant: capable of steering the model to refuse to answer questions about known entities, or to hallucinate attributes of unknown entities when it would otherwise refuse. We demonstrate that despite the sparse autoencoders being trained on the base model, these directions have a causal effect on the chat model's refusal behavior, suggesting that chat finetuning has repurposed this existing mechanism. Furthermore, we provide an initial exploration into the mechanistic role of these directions in the model, finding that they disrupt the attention of downstream heads that typically move entity attributes to the final token. </p> </div> </dd> <dt> <a name='item283'>[283]</a> <a href ="/abs/2411.14258" title="Abstract" id="2411.14258"> arXiv:2411.14258 </a> [<a href="/pdf/2411.14258" title="Download PDF" id="pdf-2411.14258" aria-labelledby="pdf-2411.14258">pdf</a>, <a href="https://arxiv.org/html/2411.14258v1" title="View HTML" id="html-2411.14258" aria-labelledby="html-2411.14258" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14258" title="Other formats" id="oth-2411.14258" aria-labelledby="oth-2411.14258">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Knowledge Graphs, Large Language Models, and Hallucinations: An NLP Perspective </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Lavrinovics,+E">Ernests Lavrinovics</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Biswas,+R">Russa Biswas</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bjerva,+J">Johannes Bjerva</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hose,+K">Katja Hose</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 7 pages, 2 Figures, 1 Table </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Large Language Models (LLMs) have revolutionized Natural Language Processing (NLP) based applications including automated text generation, question answering, chatbots, and others. However, they face a significant challenge: hallucinations, where models produce plausible-sounding but factually incorrect responses. This undermines trust and limits the applicability of LLMs in different domains. Knowledge Graphs (KGs), on the other hand, provide a structured collection of interconnected facts represented as entities (nodes) and their relationships (edges). In recent research, KGs have been leveraged to provide context that can fill gaps in an LLM understanding of certain topics offering a promising approach to mitigate hallucinations in LLMs, enhancing their reliability and accuracy while benefiting from their wide applicability. Nonetheless, it is still a very active area of research with various unresolved open problems. In this paper, we discuss these open challenges covering state-of-the-art datasets and benchmarks as well as methods for knowledge integration and evaluating hallucinations. In our discussion, we consider the current use of KGs in LLM systems and identify future directions within each of these challenges. </p> </div> </dd> <dt> <a name='item284'>[284]</a> <a href ="/abs/2411.14262" title="Abstract" id="2411.14262"> arXiv:2411.14262 </a> [<a href="/pdf/2411.14262" title="Download PDF" id="pdf-2411.14262" aria-labelledby="pdf-2411.14262">pdf</a>, <a href="https://arxiv.org/html/2411.14262v1" title="View HTML" id="html-2411.14262" aria-labelledby="html-2411.14262" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14262" title="Other formats" id="oth-2411.14262" aria-labelledby="oth-2411.14262">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Accelerating Construction of Non-Intrusive Nonlinear Structural Dynamics Reduced Order Models through Hyperreduction </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Saccani,+A">Alexander Saccani</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Tiso,+P">Paolo Tiso</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Numerical Analysis (math.NA)</span>; Computational Physics (physics.comp-ph) </div> <p class='mathjax'> We present a novel technique to significantly reduce the offline cost associated to non-intrusive nonlinear tensors identification in reduced order models (ROMs) of geometrically nonlinear, finite elements (FE)-discretized structural dynamics problems. The ROM is obtained by Galerkin-projection of the governing equations on a reduction basis (RB) of Vibration Modes (VMs) and Static Modal Derivatives (SMDs), resulting in reduced internal forces that are cubic polynomial in the reduced coordinates. The unknown coefficients of the nonlinear tensors associated with this polynomial representation are identified using a modified version of Enhanced Enforced Displacement (EED) method which leverages Energy Conserving Sampling and Weighting (ECSW) as hyperreduction technique for efficiency improvement. Specifically, ECSW is employed to accelerate the evaluations of the nonlinear reduced tangent stiffness matrix that are required within EED. Simulation-free training sets of forces for ECSW are obtained from displacements corresponding to quasi-random samples of a nonlinear second order static displacement manifold. The proposed approach is beneficial for the investigation of the dynamic response of structures subjected to acoustic loading, where multiple VMs must be added in the RB, resulting in expensive nonlinear tensor identification. Superiority of the novel method over standard EED is demonstrated on FE models of a shallow curved clamped panel and of a nine-bay aeronautical reinforced panel modelled, using the commercial finite element program Abaqus. </p> </div> </dd> <dt> <a name='item285'>[285]</a> <a href ="/abs/2411.14263" title="Abstract" id="2411.14263"> arXiv:2411.14263 </a> [<a href="/pdf/2411.14263" title="Download PDF" id="pdf-2411.14263" aria-labelledby="pdf-2411.14263">pdf</a>, <a href="https://arxiv.org/html/2411.14263v1" title="View HTML" id="html-2411.14263" aria-labelledby="html-2411.14263" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14263" title="Other formats" id="oth-2411.14263" aria-labelledby="oth-2411.14263">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Generating Realistic Adversarial Examples for Business Processes using Variational Autoencoders </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Stevens,+A">Alexander Stevens</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Peeperkorn,+J">Jari Peeperkorn</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=De+Smedt,+J">Johannes De Smedt</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=De+Weerdt,+J">Jochen De Weerdt</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> In predictive process monitoring, predictive models are vulnerable to adversarial attacks, where input perturbations can lead to incorrect predictions. Unlike in computer vision, where these perturbations are designed to be imperceptible to the human eye, the generation of adversarial examples in predictive process monitoring poses unique challenges. Minor changes to the activity sequences can create improbable or even impossible scenarios to occur due to underlying constraints such as regulatory rules or process constraints. To address this, we focus on generating realistic adversarial examples tailored to the business process context, in contrast to the imperceptible, pixel-level changes commonly seen in computer vision adversarial attacks. This paper introduces two novel latent space attacks, which generate adversaries by adding noise to the latent space representation of the input data, rather than directly modifying the input attributes. These latent space methods are domain-agnostic and do not rely on process-specific knowledge, as we restrict the generation of adversarial examples to the learned class-specific data distributions by directly perturbing the latent space representation of the business process executions. We evaluate these two latent space methods with six other adversarial attacking methods on eleven real-life event logs and four predictive models. The first three attacking methods directly permute the activities of the historically observed business process executions. The fourth method constrains the adversarial examples to lie within the same data distribution as the original instances, by projecting the adversarial examples to the original data distribution. </p> </div> </dd> <dt> <a name='item286'>[286]</a> <a href ="/abs/2411.14264" title="Abstract" id="2411.14264"> arXiv:2411.14264 </a> [<a href="/pdf/2411.14264" title="Download PDF" id="pdf-2411.14264" aria-labelledby="pdf-2411.14264">pdf</a>, <a href="https://arxiv.org/html/2411.14264v1" title="View HTML" id="html-2411.14264" aria-labelledby="html-2411.14264" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14264" title="Other formats" id="oth-2411.14264" aria-labelledby="oth-2411.14264">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Explainable Multi-Agent Reinforcement Learning for Extended Reality Codec Adaptation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Iturria-Rivera,+P+E">Pedro Enrique Iturria-Rivera</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gaigalas,+R">Raimundas Gaigalas</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Elsayed,+M">Medhat Elsayed</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bavand,+M">Majid Bavand</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ozcan,+Y">Yigit Ozcan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Erol-Kantarci,+M">Melike Erol-Kantarci</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 15 pages, 14 figures, Submitted to TCCN </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Networking and Internet Architecture (cs.NI)</span> </div> <p class='mathjax'> Extended Reality (XR) services are set to transform applications over 5th and 6th generation wireless networks, delivering immersive experiences. Concurrently, Artificial Intelligence (AI) advancements have expanded their role in wireless networks, however, trust and transparency in AI remain to be strengthened. Thus, providing explanations for AI-enabled systems can enhance trust. We introduce Value Function Factorization (VFF)-based Explainable (X) Multi-Agent Reinforcement Learning (MARL) algorithms, explaining reward design in XR codec adaptation through reward decomposition. We contribute four enhancements to XMARL algorithms. Firstly, we detail architectural modifications to enable reward decomposition in VFF-based MARL algorithms: Value Decomposition Networks (VDN), Mixture of Q-Values (QMIX), and Q-Transformation (Q-TRAN). Secondly, inspired by multi-task learning, we reduce the overhead of vanilla XMARL algorithms. Thirdly, we propose a new explainability metric, Reward Difference Fluctuation Explanation (RDFX), suitable for problems with adjustable parameters. Lastly, we propose adaptive XMARL, leveraging network gradients and reward decomposition for improved action selection. Simulation results indicate that, in XR codec adaptation, the Packet Delivery Ratio reward is the primary contributor to optimal performance compared to the initial composite reward, which included delay and Data Rate Ratio components. Modifications to VFF-based XMARL algorithms, incorporating multi-headed structures and adaptive loss functions, enable the best-performing algorithm, Multi-Headed Adaptive (MHA)-QMIX, to achieve significant average gains over the Adjust Packet Size baseline up to 10.7%, 41.4%, 33.3%, and 67.9% in XR index, jitter, delay, and Packet Loss Ratio (PLR), respectively. </p> </div> </dd> <dt> <a name='item287'>[287]</a> <a href ="/abs/2411.14267" title="Abstract" id="2411.14267"> arXiv:2411.14267 </a> [<a href="/pdf/2411.14267" title="Download PDF" id="pdf-2411.14267" aria-labelledby="pdf-2411.14267">pdf</a>, <a href="/format/2411.14267" title="Other formats" id="oth-2411.14267" aria-labelledby="oth-2411.14267">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Truly Supercritical Trade-offs for Resolution, Cutting Planes, Monotone Circuits, and Weisfeiler-Leman </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=de+Rezende,+S+F">Susanna F. de Rezende</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fleming,+N">Noah Fleming</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Janett,+D+A">Duri Andrea Janett</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nordstr%C3%B6m,+J">Jakob Nordstr枚m</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pang,+S">Shuo Pang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 47 pages, 7 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computational Complexity (cs.CC)</span>; Logic in Computer Science (cs.LO) </div> <p class='mathjax'> We exhibit supercritical trade-off for monotone circuits, showing that there are functions computable by small circuits for which any circuit must have depth super-linear or even super-polynomial in the number of variables, far exceeding the linear worst-case upper bound. We obtain similar trade-offs in proof complexity, where we establish the first size-depth trade-offs for cutting planes and resolution that are truly supercritical, i.e., in terms of formula size rather than number of variables, and we also show supercritical trade-offs between width and size for treelike resolution. Our results build on a new supercritical width-depth trade-off for resolution, obtained by refining and strengthening the compression scheme for the Cop-Robber game in [Grohe, Lichter, Neuen & Schweitzer 2023]. This yields robust supercritical trade-offs for dimension versus iteration number in the Weisfeiler-Leman algorithm, which also translate into trade-offs between number of variables and quantifier depth in first-order logic. Our other results follow from improved lifting theorems that might be of independent interest. </p> </div> </dd> <dt> <a name='item288'>[288]</a> <a href ="/abs/2411.14268" title="Abstract" id="2411.14268"> arXiv:2411.14268 </a> [<a href="/pdf/2411.14268" title="Download PDF" id="pdf-2411.14268" aria-labelledby="pdf-2411.14268">pdf</a>, <a href="https://arxiv.org/html/2411.14268v1" title="View HTML" id="html-2411.14268" aria-labelledby="html-2411.14268" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14268" title="Other formats" id="oth-2411.14268" aria-labelledby="oth-2411.14268">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Supercritical Tradeoffs for Monotone Circuits </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=G%C3%B6%C3%B6s,+M">Mika G枚枚s</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Maystre,+G">Gilbert Maystre</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Risse,+K">Kilian Risse</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sokolov,+D">Dmitry Sokolov</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computational Complexity (cs.CC)</span> </div> <p class='mathjax'> We exhibit a monotone function computable by a monotone circuit of quasipolynomial size such that any monotone circuit of polynomial depth requires exponential size. This is the first size-depth tradeoff result for monotone circuits in the so-called supercritical regime. Our proof is based on an analogous result in proof complexity: We introduce a new family of unsatisfiable 3-CNF formulas (called bracket formulas) that admit resolution refutations of quasipolynomial size while any refutation of polynomial depth requires exponential size. </p> </div> </dd> <dt> <a name='item289'>[289]</a> <a href ="/abs/2411.14272" title="Abstract" id="2411.14272"> arXiv:2411.14272 </a> [<a href="/pdf/2411.14272" title="Download PDF" id="pdf-2411.14272" aria-labelledby="pdf-2411.14272">pdf</a>, <a href="https://arxiv.org/html/2411.14272v1" title="View HTML" id="html-2411.14272" aria-labelledby="html-2411.14272" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14272" title="Other formats" id="oth-2411.14272" aria-labelledby="oth-2411.14272">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Efficient Aspect-Based Summarization of Climate Change Reports with Small Language Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ghinassi,+I">Iacopo Ghinassi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Catalano,+L">Leonardo Catalano</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Colella,+T">Tommaso Colella</a></div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> Proceedings of the Third Workshop on NLP for Positive Impact (2024) 123-139 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span> </div> <p class='mathjax'> The use of Natural Language Processing (NLP) for helping decision-makers with Climate Change action has recently been highlighted as a use case aligning with a broader drive towards NLP technologies for social good. In this context, Aspect-Based Summarization (ABS) systems that extract and summarize relevant information are particularly useful as they provide stakeholders with a convenient way of finding relevant information in expert-curated reports. In this work, we release a new dataset for ABS of Climate Change reports and we employ different Large Language Models (LLMs) and so-called Small Language Models (SLMs) to tackle this problem in an unsupervised way. Considering the problem at hand, we also show how SLMs are not significantly worse for the problem while leading to reduced carbon footprint; we do so by applying for the first time an existing framework considering both energy efficiency and task performance to the evaluation of zero-shot generative models for ABS. Overall, our results show that modern language models, both big and small, can effectively tackle ABS for Climate Change reports but more research is needed when we frame the problem as a Retrieval Augmented Generation (RAG) problem and our work and dataset will help foster efforts in this direction. </p> </div> </dd> <dt> <a name='item290'>[290]</a> <a href ="/abs/2411.14275" title="Abstract" id="2411.14275"> arXiv:2411.14275 </a> [<a href="/pdf/2411.14275" title="Download PDF" id="pdf-2411.14275" aria-labelledby="pdf-2411.14275">pdf</a>, <a href="https://arxiv.org/html/2411.14275v1" title="View HTML" id="html-2411.14275" aria-labelledby="html-2411.14275" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14275" title="Other formats" id="oth-2411.14275" aria-labelledby="oth-2411.14275">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Exploring the Impact of Quizzes Interleaved with Write-Code Tasks in Elementary-Level Visual Programming </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ghosh,+A">Ahana Ghosh</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Malva,+L">Liina Malva</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gotovos,+A">Alkis Gotovos</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hooshyar,+D">Danial Hooshyar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Singla,+A">Adish Singla</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Preprint of the SIGCSE'25 paper </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computers and Society (cs.CY)</span> </div> <p class='mathjax'> We explore the role of quizzes in elementary visual programming domains popularly used for K-8 computing education. Prior work has studied various quiz types, such as fill-in-the-gap write-code questions. However, the overall impact of these quizzes is unclear: studies often show utility in the learning phase when enhanced with quizzes, though limited transfer of utility in the post-learning phase. In this paper, we aim to better understand the impact of different quiz types and whether quizzes focusing on diverse skills (e.g., code debugging and task design) would have higher utility. We design a study with Hour of Code: Maze Challenge by <a href="http://code.org" rel="external noopener nofollow" class="link-external link-http">this http URL</a> as the base curriculum, interleaved with different quiz types. Specifically, we examine two learning groups: (i) HoC-ACE with diverse quizzes including solution tracing, code debugging, code equivalence, and task design; (ii) HoC-Fill with simple quizzes on solution finding. We conducted a large-scale study with 405 students in grades 6--7. Our results highlight that the curriculum enhanced with richer quizzes led to higher utility during the post-learning phase. </p> </div> </dd> <dt> <a name='item291'>[291]</a> <a href ="/abs/2411.14276" title="Abstract" id="2411.14276"> arXiv:2411.14276 </a> [<a href="/pdf/2411.14276" title="Download PDF" id="pdf-2411.14276" aria-labelledby="pdf-2411.14276">pdf</a>, <a href="https://arxiv.org/html/2411.14276v1" title="View HTML" id="html-2411.14276" aria-labelledby="html-2411.14276" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14276" title="Other formats" id="oth-2411.14276" aria-labelledby="oth-2411.14276">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A $k^{\frac{q}{q-2}}$ Lower Bound for Odd Query Locally Decodable Codes from Bipartite Kikuchi Graphs </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Janzer,+O">Oliver Janzer</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Manohar,+P">Peter Manohar</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computational Complexity (cs.CC)</span>; Information Theory (cs.IT) </div> <p class='mathjax'> A code $C \colon \{0,1\}^k \to \{0,1\}^n$ is a $q$-query locally decodable code ($q$-LDC) if one can recover any chosen bit $b_i$ of the message $b \in \{0,1\}^k$ with good confidence by querying a corrupted string $\tilde{x}$ of the codeword $x = C(b)$ in at most $q$ coordinates. For $2$ queries, the Hadamard code is a $2$-LDC of length $n = 2^k$, and this code is in fact essentially optimal. For $q \geq 3$, there is a large gap in our understanding: the best constructions achieve $n = \exp(k^{o(1)})$, while prior to the recent work of [AGKM23], the best lower bounds were $n \geq \tilde{\Omega}(k^{\frac{q}{q-2}})$ for $q$ even and $n \geq \tilde{\Omega}(k^{\frac{q+1}{q-1}})$ for $q$ odd. <br>The recent work of [AGKM23] used spectral methods to prove a lower bound of $n \geq \tilde{\Omega}(k^3)$ for $q = 3$, thus achieving the "$k^{\frac{q}{q-2}}$ bound" for an odd value of $q$. However, their proof does not extend to any odd $q \geq 5$. In this paper, we prove a $q$-LDC lower bound of $n \geq \tilde{\Omega}(k^{\frac{q}{q-2}})$ for any odd $q$. Our key technical idea is the use of an imbalanced bipartite Kikuchi graph, which gives a simpler method to analyze spectral refutations of odd arity XOR without using the standard "Cauchy-Schwarz trick", a trick that typically produces random matrices with correlated entries and makes the analysis for odd arity XOR significantly more complicated than even arity XOR. </p> </div> </dd> <dt> <a name='item292'>[292]</a> <a href ="/abs/2411.14277" title="Abstract" id="2411.14277"> arXiv:2411.14277 </a> [<a href="/pdf/2411.14277" title="Download PDF" id="pdf-2411.14277" aria-labelledby="pdf-2411.14277">pdf</a>, <a href="https://arxiv.org/html/2411.14277v1" title="View HTML" id="html-2411.14277" aria-labelledby="html-2411.14277" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14277" title="Other formats" id="oth-2411.14277" aria-labelledby="oth-2411.14277">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Neuro-Symbolic Query Optimization in Knowledge Graphs </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Acosta,+M">Maribel Acosta</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Qin,+C">Chang Qin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Schwabe,+T">Tim Schwabe</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Databases (cs.DB)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> This chapter delves into the emerging field of neuro-symbolic query optimization for knowledge graphs (KGs), presenting a comprehensive exploration of how neural and symbolic techniques can be integrated to enhance query processing. Traditional query optimizers in knowledge graphs rely heavily on symbolic methods, utilizing dataset summaries, statistics, and cost models to select efficient execution plans. However, these approaches often suffer from misestimations and inaccuracies, particularly when dealing with complex queries or large-scale datasets. Recent advancements have introduced neural models, which capture non-linear aspects of query optimization, offering promising alternatives to purely symbolic methods. In this chapter, we introduce neuro-symbolic query optimizers, a novel approach that combines the strengths of symbolic reasoning with the adaptability of neural computation. We discuss the architecture of these hybrid systems, highlighting the interplay between neural and symbolic components to improve the optimizer's ability to navigate the search space and produce efficient execution plans. Additionally, the chapter reviews existing neural components tailored for optimizing queries over knowledge graphs and examines the limitations and challenges in deploying neuro-symbolic query optimizers in real-world environments. </p> </div> </dd> <dt> <a name='item293'>[293]</a> <a href ="/abs/2411.14278" title="Abstract" id="2411.14278"> arXiv:2411.14278 </a> [<a href="/pdf/2411.14278" title="Download PDF" id="pdf-2411.14278" aria-labelledby="pdf-2411.14278">pdf</a>, <a href="https://arxiv.org/html/2411.14278v1" title="View HTML" id="html-2411.14278" aria-labelledby="html-2411.14278" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14278" title="Other formats" id="oth-2411.14278" aria-labelledby="oth-2411.14278">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Adaptive Anomaly Detection for Identifying Attacks in Cyber-Physical Systems: A Systematic Literature Review </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Moriano,+P">Pablo Moriano</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hespeler,+S+C">Steven C. Hespeler</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+M">Mingyan Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mahbub,+M">Maria Mahbub</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 35 pages, 4 figures, 6 tables </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Cryptography and Security (cs.CR)</span> </div> <p class='mathjax'> Modern cyberattacks in cyber-physical systems (CPS) rapidly evolve and cannot be deterred effectively with most current methods which focused on characterizing past threats. Adaptive anomaly detection (AAD) is among the most promising techniques to detect evolving cyberattacks focused on fast data processing and model adaptation. AAD has been researched in the literature extensively; however, to the best of our knowledge, our work is the first systematic literature review (SLR) on the current research within this field. We present a comprehensive SLR, gathering 397 relevant papers and systematically analyzing 65 of them (47 research and 18 survey papers) on AAD in CPS studies from 2013 to 2023 (November). We introduce a novel taxonomy considering attack types, CPS application, learning paradigm, data management, and algorithms. Our analysis indicates, among other findings, that reviewed works focused on a single aspect of adaptation (either data processing or model adaptation) but rarely in both at the same time. We aim to help researchers to advance the state of the art and help practitioners to become familiar with recent progress in this field. We identify the limitations of the state of the art and provide recommendations for future research directions. </p> </div> </dd> <dt> <a name='item294'>[294]</a> <a href ="/abs/2411.14279" title="Abstract" id="2411.14279"> arXiv:2411.14279 </a> [<a href="/pdf/2411.14279" title="Download PDF" id="pdf-2411.14279" aria-labelledby="pdf-2411.14279">pdf</a>, <a href="/format/2411.14279" title="Other formats" id="oth-2411.14279" aria-labelledby="oth-2411.14279">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Looking Beyond Text: Reducing Language bias in Large Vision-Language Models via Multimodal Dual-Attention and Soft-Image Guidance </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+H">Haozhe Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Si,+S">Shuzheng Si</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+L">Liang Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Y">Yichi Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sun,+M">Maosong Sun</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+M">Mingjia Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chang,+B">Baobao Chang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 19 pages, 12 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Computation and Language (cs.CL) </div> <p class='mathjax'> Large vision-language models (LVLMs) have achieved impressive results in various vision-language tasks. However, despite showing promising performance, LVLMs suffer from hallucinations caused by language bias, leading to diminished focus on images and ineffective visual comprehension. We identify two primary reasons for this bias: 1. Different scales of training data between the pretraining stage of LLM and multimodal alignment stage. 2. The learned inference bias due to short-term dependency of text data. Therefore, we propose LACING, a systemic framework designed to address the language bias of LVLMs with muLtimodal duAl-attention meChanIsm (MDA) aNd soft-image Guidance (IFG). Specifically, MDA introduces a parallel dual-attention mechanism that enhances the integration of visual inputs across the model. IFG introduces a learnable soft visual prompt during training and inference to replace visual inputs, designed to compel LVLMs to prioritize text inputs. Then, IFG further proposes a novel decoding strategy using the soft visual prompt to mitigate the model's over-reliance on adjacent text inputs. Comprehensive experiments demonstrate that our method effectively debiases LVLMs from their language bias, enhancing visual comprehension and reducing hallucinations without requiring additional training resources or data. The code and model are available at [<a href="http://lacing-lvlm.github.io" rel="external noopener nofollow" class="link-external link-http">this http URL</a>](<a href="https://lacing-lvlm.github.io" rel="external noopener nofollow" class="link-external link-https">this https URL</a>). </p> </div> </dd> <dt> <a name='item295'>[295]</a> <a href ="/abs/2411.14280" title="Abstract" id="2411.14280"> arXiv:2411.14280 </a> [<a href="/pdf/2411.14280" title="Download PDF" id="pdf-2411.14280" aria-labelledby="pdf-2411.14280">pdf</a>, <a href="https://arxiv.org/html/2411.14280v1" title="View HTML" id="html-2411.14280" aria-labelledby="html-2411.14280" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14280" title="Other formats" id="oth-2411.14280" aria-labelledby="oth-2411.14280">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> EasyHOI: Unleashing the Power of Large Models for Reconstructing Hand-Object Interactions in the Wild </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Y">Yumeng Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Long,+X">Xiaoxiao Long</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+Z">Zemin Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Y">Yuan Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Habermann,+M">Marc Habermann</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Theobalt,+C">Christian Theobalt</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ma,+Y">Yuexin Ma</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+W">Wenping Wang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Project page: <a href="https://lym29.github.io/EasyHOI-page/" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Our work aims to reconstruct hand-object interactions from a single-view image, which is a fundamental but ill-posed task. Unlike methods that reconstruct from videos, multi-view images, or predefined 3D templates, single-view reconstruction faces significant challenges due to inherent ambiguities and occlusions. These challenges are further amplified by the diverse nature of hand poses and the vast variety of object shapes and sizes. Our key insight is that current foundational models for segmentation, inpainting, and 3D reconstruction robustly generalize to in-the-wild images, which could provide strong visual and geometric priors for reconstructing hand-object interactions. Specifically, given a single image, we first design a novel pipeline to estimate the underlying hand pose and object shape using off-the-shelf large models. Furthermore, with the initial reconstruction, we employ a prior-guided optimization scheme, which optimizes hand pose to comply with 3D physical constraints and the 2D input image content. We perform experiments across several datasets and show that our method consistently outperforms baselines and faithfully reconstructs a diverse set of hand-object interactions. Here is the link of our project page: <a href="https://lym29.github.io/EasyHOI-page/" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </p> </div> </dd> <dt> <a name='item296'>[296]</a> <a href ="/abs/2411.14281" title="Abstract" id="2411.14281"> arXiv:2411.14281 </a> [<a href="/pdf/2411.14281" title="Download PDF" id="pdf-2411.14281" aria-labelledby="pdf-2411.14281">pdf</a>, <a href="https://arxiv.org/html/2411.14281v1" title="View HTML" id="html-2411.14281" aria-labelledby="html-2411.14281" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14281" title="Other formats" id="oth-2411.14281" aria-labelledby="oth-2411.14281">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Q-CSM: Q-Learning-based Cognitive Service Management in Heterogeneous IoT Networks </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Duran,+K">Kubra Duran</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ozdem,+M">Mehmet Ozdem</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gursu,+K">Kerem Gursu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Canberk,+B">Berk Canberk</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Networking and Internet Architecture (cs.NI)</span> </div> <p class='mathjax'> The dramatic increase in the number of smart services and their diversity poses a significant challenge in Internet of Things (IoT) networks: heterogeneity. This causes significant quality of service (QoS) degradation in IoT networks. In addition, the constraints of IoT devices in terms of computational capability and energy resources add extra complexity to this. However, the current studies remain insufficient to solve this problem due to the lack of cognitive action recommendations. Therefore, we propose a Q-learning-based Cognitive Service Management framework called Q-CSM. In this framework, we first design an IoT Agent Manager to handle the heterogeneity in data formats. After that, we design a Q-learning-based recommendation engine to optimize the devices' lifetime according to the predicted QoS behaviour of the changing IoT network scenarios. We apply the proposed cognitive management to a smart city scenario consisting of three specific services: wind turbines, solar panels, and transportation systems. We note that our proposed cognitive method achieves 38.7% faster response time to the dynamical IoT changes in topology. Furthermore, the proposed framework achieves 19.8% longer lifetime on average for constrained IoT devices thanks to its Q-learning-based cognitive decision capability. In addition, we explore the most successive learning rate value in the Q-learning run through the exploration and exploitation phases. </p> </div> </dd> <dt> <a name='item297'>[297]</a> <a href ="/abs/2411.14283" title="Abstract" id="2411.14283"> arXiv:2411.14283 </a> [<a href="/pdf/2411.14283" title="Download PDF" id="pdf-2411.14283" aria-labelledby="pdf-2411.14283">pdf</a>, <a href="https://arxiv.org/html/2411.14283v1" title="View HTML" id="html-2411.14283" aria-labelledby="html-2411.14283" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14283" title="Other formats" id="oth-2411.14283" aria-labelledby="oth-2411.14283">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> CAIP: Detecting Router Misconfigurations with Context-Aware Iterative Prompting of LLMs </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Jiang,+X">Xi Jiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gember-Jacobson,+A">Aaron Gember-Jacobson</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Feamster,+N">Nick Feamster</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 12 pages, 4 tables, 5 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Networking and Internet Architecture (cs.NI)</span> </div> <p class='mathjax'> Model checkers and consistency checkers detect critical errors in router configurations, but these tools require significant manual effort to develop and maintain. LLM-based Q&A models have emerged as a promising alternative, allowing users to query partitions of configurations through prompts and receive answers based on learned patterns, thanks to transformer models pre-trained on vast datasets that provide generic configuration context for interpreting router configurations. Yet, current methods of partition-based prompting often do not provide enough network-specific context from the actual configurations to enable accurate inference. We introduce a Context-Aware Iterative Prompting (CAIP) framework that automates network-specific context extraction and optimizes LLM prompts for more precise router misconfiguration detection. CAIP addresses three challenges: (1) efficiently mining relevant context from complex configuration files, (2) accurately distinguishing between pre-defined and user-defined parameter values to prevent irrelevant context from being introduced, and (3) managing prompt context overload with iterative, guided interactions with the model. Our evaluations on synthetic and real-world configurations show that CAIP improves misconfiguration detection accuracy by more than 30% compared to partition-based LLM approaches, model checkers, and consistency checkers, uncovering over 20 previously undetected misconfigurations in real-world configurations. </p> </div> </dd> <dt> <a name='item298'>[298]</a> <a href ="/abs/2411.14284" title="Abstract" id="2411.14284"> arXiv:2411.14284 </a> [<a href="/pdf/2411.14284" title="Download PDF" id="pdf-2411.14284" aria-labelledby="pdf-2411.14284">pdf</a>, <a href="https://arxiv.org/html/2411.14284v1" title="View HTML" id="html-2411.14284" aria-labelledby="html-2411.14284" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14284" title="Other formats" id="oth-2411.14284" aria-labelledby="oth-2411.14284">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Algebras for Deterministic Computation Are Inherently Incomplete </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Cate,+B+t">Balder ten Cate</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kapp%C3%A9,+T">Tobias Kapp茅</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Programming Languages (cs.PL)</span> </div> <p class='mathjax'> Kleene Algebra with Tests (KAT) provides an elegant algebraic framework for describing non-deterministic finite-state computations. Using a small finite set of non-deterministic programming constructs (sequencing, non-deterministic choice, and iteration) it is able to express all non-deterministic finite state control flow over a finite set of primitives. It is natural to ask whether there exists a similar finite set of constructs that can capture all deterministic computation. We show that this is not the case. More precisely, the deterministic fragment of KAT is not generated by any finite set of regular control flow operations. This generalizes earlier results about the expressivity of the traditional control flow operations, i.e., sequential composition, if-then-else and while. </p> </div> </dd> <dt> <a name='item299'>[299]</a> <a href ="/abs/2411.14288" title="Abstract" id="2411.14288"> arXiv:2411.14288 </a> [<a href="/pdf/2411.14288" title="Download PDF" id="pdf-2411.14288" aria-labelledby="pdf-2411.14288">pdf</a>, <a href="https://arxiv.org/html/2411.14288v1" title="View HTML" id="html-2411.14288" aria-labelledby="html-2411.14288" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14288" title="Other formats" id="oth-2411.14288" aria-labelledby="oth-2411.14288">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> On the Sample Complexity of One Hidden Layer Networks with Equivariance, Locality and Weight Sharing </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Behboodi,+A">Arash Behboodi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cesa,+G">Gabriele Cesa</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Statistics Theory (math.ST); Machine Learning (stat.ML) </div> <p class='mathjax'> Weight sharing, equivariance, and local filters, as in convolutional neural networks, are believed to contribute to the sample efficiency of neural networks. However, it is not clear how each one of these design choices contribute to the generalization error. Through the lens of statistical learning theory, we aim to provide an insight into this question by characterizing the relative impact of each choice on the sample complexity. We obtain lower and upper sample complexity bounds for a class of single hidden layer networks. It is shown that the gain of equivariance is directly manifested in the bound, while getting a similar increase for weight sharing depends on the sharing mechanism. Among our results, we obtain a completely dimension-free bound for equivariant networks for a class of pooling operations. We show that the bound depends merely on the norm of filters, which is tighter than using the spectral norm of the respective matrix. We also characterize the trade-off in sample complexity between the parametrization of filters in spatial and frequency domains, particularly when spatial filters are localized as in vanilla convolutional neural networks. </p> </div> </dd> <dt> <a name='item300'>[300]</a> <a href ="/abs/2411.14290" title="Abstract" id="2411.14290"> arXiv:2411.14290 </a> [<a href="/pdf/2411.14290" title="Download PDF" id="pdf-2411.14290" aria-labelledby="pdf-2411.14290">pdf</a>, <a href="https://arxiv.org/html/2411.14290v1" title="View HTML" id="html-2411.14290" aria-labelledby="html-2411.14290" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14290" title="Other formats" id="oth-2411.14290" aria-labelledby="oth-2411.14290">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Soft Manipulation Surface With Reduced Actuator Density For Heterogeneous Object Manipulation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ingle,+P">Pratik Ingle</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=St%C3%B8y,+K">Kasper St酶y</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fai%C3%B1a,+A">Andres Fai帽a</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span> </div> <p class='mathjax'> Object manipulation in robotics faces challenges due to diverse object shapes, sizes, and fragility. Gripper-based methods offer precision and low degrees of freedom (DOF) but the gripper limits the kind of objects to grasp. On the other hand, surface-based approaches provide flexibility for handling fragile and heterogeneous objects but require numerous actuators, increasing complexity. We propose new manipulation hardware that utilizes equally spaced linear actuators placed vertically and connected by a soft surface. In this setup, object manipulation occurs on the soft surface through coordinated movements of the surrounding actuators. This approach requires fewer actuators to cover a large manipulation area, offering a cost-effective solution with a lower DOF compared to dense actuator arrays. It also effectively handles heterogeneous objects of varying shapes and weights, even when they are significantly smaller than the distance between actuators. This method is particularly suitable for managing highly fragile objects in the food industry. </p> </div> </dd> <dt> <a name='item301'>[301]</a> <a href ="/abs/2411.14295" title="Abstract" id="2411.14295"> arXiv:2411.14295 </a> [<a href="/pdf/2411.14295" title="Download PDF" id="pdf-2411.14295" aria-labelledby="pdf-2411.14295">pdf</a>, <a href="https://arxiv.org/html/2411.14295v1" title="View HTML" id="html-2411.14295" aria-labelledby="html-2411.14295" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14295" title="Other formats" id="oth-2411.14295" aria-labelledby="oth-2411.14295">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> StereoCrafter-Zero: Zero-Shot Stereo Video Generation with Noisy Restart </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Shi,+J">Jian Shi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Q">Qian Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+Z">Zhenyu Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wonka,+P">Peter Wonka</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Generating high-quality stereo videos that mimic human binocular vision requires maintaining consistent depth perception and temporal coherence across frames. While diffusion models have advanced image and video synthesis, generating high-quality stereo videos remains challenging due to the difficulty of maintaining consistent temporal and spatial coherence between left and right views. We introduce \textit{StereoCrafter-Zero}, a novel framework for zero-shot stereo video generation that leverages video diffusion priors without the need for paired training data. Key innovations include a noisy restart strategy to initialize stereo-aware latents and an iterative refinement process that progressively harmonizes the latent space, addressing issues like temporal flickering and view inconsistencies. Comprehensive evaluations, including quantitative metrics and user studies, demonstrate that \textit{StereoCrafter-Zero} produces high-quality stereo videos with improved depth consistency and temporal smoothness, even when depth estimations are imperfect. Our framework is robust and adaptable across various diffusion models, setting a new benchmark for zero-shot stereo video generation and enabling more immersive visual experiences. Our code can be found in~\url{<a href="https://github.com/shijianjian/StereoCrafter-Zero" rel="external noopener nofollow" class="link-external link-https">this https URL</a>}. </p> </div> </dd> <dt> <a name='item302'>[302]</a> <a href ="/abs/2411.14296" title="Abstract" id="2411.14296"> arXiv:2411.14296 </a> [<a href="/pdf/2411.14296" title="Download PDF" id="pdf-2411.14296" aria-labelledby="pdf-2411.14296">pdf</a>, <a href="https://arxiv.org/html/2411.14296v1" title="View HTML" id="html-2411.14296" aria-labelledby="html-2411.14296" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14296" title="Other formats" id="oth-2411.14296" aria-labelledby="oth-2411.14296">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Improving Routability Prediction via NAS Using a Smooth One-shot Augmented Predictor </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Arjun">Arjun Sridhar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chang,+C">Chen-Chia Chang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+J">Junyao Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+Y">Yiran Chen</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> Routability optimization in modern EDA tools has benefited greatly from using machine learning (ML) models. Constructing and optimizing the performance of ML models continues to be a challenge. Neural Architecture Search (NAS) serves as a tool to aid in the construction and improvement of these models. Traditional NAS techniques struggle to perform well on routability prediction as a result of two primary factors. First, the separation between the training objective and the search objective adds noise to the NAS process. Secondly, the increased variance of the search objective further complicates performing NAS. We craft a novel NAS technique, coined SOAP-NAS, to address these challenges through novel data augmentation techniques and a novel combination of one-shot and predictor-based NAS. Results show that our technique outperforms existing solutions by 40% closer to the ideal performance measured by ROC-AUC (area under the receiver operating characteristic curve) in DRC hotspot detection. SOAPNet is able to achieve an ROC-AUC of 0.9802 and a query time of only 0.461 ms. </p> </div> </dd> <dt> <a name='item303'>[303]</a> <a href ="/abs/2411.14298" title="Abstract" id="2411.14298"> arXiv:2411.14298 </a> [<a href="/pdf/2411.14298" title="Download PDF" id="pdf-2411.14298" aria-labelledby="pdf-2411.14298">pdf</a>, <a href="/format/2411.14298" title="Other formats" id="oth-2411.14298" aria-labelledby="oth-2411.14298">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Decoding the Meaning of Success on Digital Labor Platforms: Worker-Centered Perspectives </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+P">Pyeonghwa Kim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Asante-Agyei,+C">Charis Asante-Agyei</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Munoz,+I">Isabel Munoz</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dunn,+M">Michael Dunn</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sawyer,+S">Steve Sawyer</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 29 pages </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Human-Computer Interaction (cs.HC)</span> </div> <p class='mathjax'> What does work and career success mean for those who secure their work using digital labor platforms? Traditional research on success predominantly relies on organizationally-centric benchmarks, such as promotions and income. These measures provide limited insights into the evolving nature of work and careers shaped at the intersection of digital labor platform technologies and workers' evolving perspectives. Drawing on data from a longitudinal study of 108 digital labor platform workers on Upwork, we (1) identify seven dimensions of success indicators that reflect workers' definitions of success in platform-mediated work and careers, (2) delineate three dimensions of digital labor platforms mediating workers' experiences of success and (3) examine the shifting perspectives of these workers relative to success. Based on these findings, we discuss the implications of platform-mediated success in workers' labor experiences, marked by platformic management, standardization, precarity and ongoing evolution. Our discussion intertwines CSCW scholarship with career studies, advancing a more nuanced understanding of the evolving perspectives on success in platform-mediated work and careers. </p> </div> </dd> <dt> <a name='item304'>[304]</a> <a href ="/abs/2411.14299" title="Abstract" id="2411.14299"> arXiv:2411.14299 </a> [<a href="/pdf/2411.14299" title="Download PDF" id="pdf-2411.14299" aria-labelledby="pdf-2411.14299">pdf</a>, <a href="https://arxiv.org/html/2411.14299v1" title="View HTML" id="html-2411.14299" aria-labelledby="html-2411.14299" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14299" title="Other formats" id="oth-2411.14299" aria-labelledby="oth-2411.14299">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Auto-SPICE: Leveraging LLMs for Dataset Creation via Automated SPICE Netlist Extraction from Analog Circuit Diagrams </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Bhandari,+J">Jitendra Bhandari</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bhat,+V">Vineet Bhat</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=He,+Y">Yuheng He</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Garg,+S">Siddharth Garg</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rahmani,+H">Hamed Rahmani</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Karri,+R">Ramesh Karri</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Hardware Architecture (cs.AR)</span> </div> <p class='mathjax'> Auto-SPICE is the first fully automated framework leveraging large language models (LLMs) to generate Simulation Programs with Integrated Circuit Emphasis (SPICE) netlists. It addresses a long-standing challenge in automating netlist generation for analog circuits within circuit design automation. Automating this workflow could accelerate the creation of finetuned LLMs for analog circuit design and verification. We identify key challenges in this automation and evaluate the multi-modal capabilities of state-of-the-art LLMs, particularly GPT-4, to address these issues. We propose a three-step workflow to overcome current limitations: labeling analog circuits, prompt tuning, and netlist verification. This approach aims to create an end-to-end SPICE netlist generator from circuit schematic images, tackling the long-standing hurdle of accurate netlist generation. Our framework demonstrates significant performance improvements, tested on approximately 2,100 schematics of varying complexity. We open-source this solution for community-driven development. </p> </div> </dd> <dt> <a name='item305'>[305]</a> <a href ="/abs/2411.14301" title="Abstract" id="2411.14301"> arXiv:2411.14301 </a> [<a href="/pdf/2411.14301" title="Download PDF" id="pdf-2411.14301" aria-labelledby="pdf-2411.14301">pdf</a>, <a href="/format/2411.14301" title="Other formats" id="oth-2411.14301" aria-labelledby="oth-2411.14301">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Sustainability concepts for digital research infrastructures developed through ground-level stakeholder empowerment </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ahrens,+F">Florian Ahrens</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Geatches,+D">Dawn Geatches</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=McCarroll,+N">Niall McCarroll</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Buck,+J">Justin Buck</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lorenzo-Lopez,+A">Alvaro Lorenzo-Lopez</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Keshtkar,+H">Hossein Keshtkar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fayyad,+N">Nadine Fayyad</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hassanloo,+H">Hamidreza Hassanloo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Manika,+D">Danae Manika</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 18 pages, 1 figure </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computers and Society (cs.CY)</span> </div> <p class='mathjax'> The UK Research and Innovation Digital Research Infrastructure (DRI) needs to operate sustainably in the future, encompassing its use of energy and resources, and embedded computer hardware carbon emissions. Transition concepts towards less unsustainable operations will inform the future design and operations of DRI. A problem remains that, while the skills and knowledge for solving net zero challenges already exist within the UK's DRI community, the mechanisms for sharing them and enabling behavior change are missing. Without adopting community-driven approaches, individual stakeholders may feel isolated and uncertain about how to play their role in the transition. A research programme was funded to give voice to the ground-level stakeholders of the DRI ecosystem for the co-creation of carbon downshift concepts. This article presents the results of the programme, with the goal to inform a fair and just transition from the ground-level, complementing the top-down interventions of energy efficiency policies and renewable energies integration. A workshop-based innovation method was developed for researching stakeholder recommendations and perspectives on the sustainable transition of the UK's DRI. We find that giving a purposeful voice to the stakeholders for shaping their own future sustainable DRI environment can be achieved by a guided, expert-integrated, interactive and problem-focused workshop series. The chosen workshop design is impactful on creating bottom-up agency for climate action by first defining the high-level problems of unsustainability in energy and fossil-fuel consumption, and then connecting them to the ground-level circumstances of DRI stakeholders. This approach to stakeholder management should initiate a sustainable transition that promises to kick-start impactful changes from within communities, adding to high-level efforts from economics, policy, and governance. </p> </div> </dd> <dt> <a name='item306'>[306]</a> <a href ="/abs/2411.14303" title="Abstract" id="2411.14303"> arXiv:2411.14303 </a> [<a href="/pdf/2411.14303" title="Download PDF" id="pdf-2411.14303" aria-labelledby="pdf-2411.14303">pdf</a>, <a href="https://arxiv.org/html/2411.14303v1" title="View HTML" id="html-2411.14303" aria-labelledby="html-2411.14303" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14303" title="Other formats" id="oth-2411.14303" aria-labelledby="oth-2411.14303">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Automated Generation of Code Debugging Exercises </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=P%C4%83durean,+V">Victor-Alexandru P膬durean</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Denny,+P">Paul Denny</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Singla,+A">Adish Singla</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Preprint of the SIGCSE'25 paper </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Software Engineering (cs.SE)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Debugging is an essential skill when learning to program, yet its instruction and emphasis often vary widely across introductory courses. In the era of code-generating large language models (LLMs), the ability for students to reason about code and identify errors is increasingly important. However, students frequently resort to trial-and-error methods to resolve bugs without fully understanding the underlying issues. Developing the ability to identify and hypothesize the cause of bugs is crucial but can be time-consuming to teach effectively through traditional means. This paper introduces BugSpotter, an innovative tool that leverages an LLM to generate buggy code from a problem description and verify the synthesized bugs via a test suite. Students interact with BugSpotter by designing failing test cases, where the buggy code's output differs from the expected result as defined by the problem specification. This not only provides opportunities for students to enhance their debugging skills, but also to practice reading and understanding problem specifications. We deployed BugSpotter in a large classroom setting and compared the debugging exercises it generated to exercises hand-crafted by an instructor for the same problems. We found that the LLM-generated exercises produced by BugSpotter varied in difficulty and were well-matched to the problem specifications. Importantly, the LLM-generated exercises were comparable to those manually created by instructors with respect to student performance, suggesting that BugSpotter could be an effective and efficient aid for learning debugging. </p> </div> </dd> <dt> <a name='item307'>[307]</a> <a href ="/abs/2411.14305" title="Abstract" id="2411.14305"> arXiv:2411.14305 </a> [<a href="/pdf/2411.14305" title="Download PDF" id="pdf-2411.14305" aria-labelledby="pdf-2411.14305">pdf</a>, <a href="https://arxiv.org/html/2411.14305v1" title="View HTML" id="html-2411.14305" aria-labelledby="html-2411.14305" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14305" title="Other formats" id="oth-2411.14305" aria-labelledby="oth-2411.14305">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Outlier-robust Mean Estimation near the Breakdown Point via Sum-of-Squares </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+H">Hongjie Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Narayanan,+D">Deepak Narayanan Sridharan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Steurer,+D">David Steurer</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted at SODA 2025, 47 pages </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Data Structures and Algorithms (cs.DS)</span>; Machine Learning (cs.LG); Machine Learning (stat.ML) </div> <p class='mathjax'> We revisit the problem of estimating the mean of a high-dimensional distribution in the presence of an $\varepsilon$-fraction of adversarial outliers. <br>When $\varepsilon$ is at most some sufficiently small constant, previous works can achieve optimal error rate efficiently \cite{diakonikolas2018robustly, kothari2018robust}. As $\varepsilon$ approaches the breakdown point $\frac{1}{2}$, all previous algorithms incur either sub-optimal error rates or exponential running time. <br>In this paper we give a new analysis of the canonical sum-of-squares program introduced in \cite{kothari2018robust} and show that this program efficiently achieves optimal error rate for all $\varepsilon \in[0,\frac{1}{2})$. The key ingredient for our results is a new identifiability proof for robust mean estimation that focuses on the overlap between the distributions instead of their statistical distance as in previous works. We capture this proof within the sum-of-squares proof system, thus obtaining efficient algorithms using the sum-of-squares proofs to algorithms paradigm \cite{raghavendra2018high}. </p> </div> </dd> <dt> <a name='item308'>[308]</a> <a href ="/abs/2411.14314" title="Abstract" id="2411.14314"> arXiv:2411.14314 </a> [<a href="/pdf/2411.14314" title="Download PDF" id="pdf-2411.14314" aria-labelledby="pdf-2411.14314">pdf</a>, <a href="https://arxiv.org/html/2411.14314v1" title="View HTML" id="html-2411.14314" aria-labelledby="html-2411.14314" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14314" title="Other formats" id="oth-2411.14314" aria-labelledby="oth-2411.14314">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Switching Graph Matrix Norm Bounds: from i.i.d. to Random Regular Graphs </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+J">Jeff Xu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computational Complexity (cs.CC)</span> </div> <p class='mathjax'> In this work, we give novel spectral norm bounds for graph matrix on inputs being random regular graphs. Graph matrix is a family of random matrices with entries given by polynomial functions of the underlying input. These matrices have been known to be the backbone for the analysis of various average-case algorithms and hardness. Previous investigations of such matrices are largely restricted to the \Erdos-\Renyi model, and tight matrix norm bounds on regular graphs are only known for specific examples. We unite these two lines of investigations, and give the first result departing from the \Erdos-\Renyi setting in the full generality of graph matrices. We believe our norm bound result would enable a simple transfer of spectral analysis for average-case algorithms and hardness between these two distributions of random graphs. <br>As an application of our spectral norm bounds, we show that higher-degree Sum-of-Squares lower bounds for the independent set problem on \Erdos-\Renyi random graphs can be switched into lower bounds on random $d$-regular graphs. Our result is the first to address the general open question of analyzing higher-degree Sum-of-Squares on random regular graphs. </p> </div> </dd> <dt> <a name='item309'>[309]</a> <a href ="/abs/2411.14315" title="Abstract" id="2411.14315"> arXiv:2411.14315 </a> [<a href="/pdf/2411.14315" title="Download PDF" id="pdf-2411.14315" aria-labelledby="pdf-2411.14315">pdf</a>, <a href="https://arxiv.org/html/2411.14315v1" title="View HTML" id="html-2411.14315" aria-labelledby="html-2411.14315" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14315" title="Other formats" id="oth-2411.14315" aria-labelledby="oth-2411.14315">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Introducing a Harmonic Balance Navier-Stokes Finite Element Solver to Accelerate Cardiovascular Simulations </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Jia,+D">Dongjie Jia</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Esmaily,+M">Mahdi Esmaily</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Numerical Analysis (math.NA)</span>; Fluid Dynamics (physics.flu-dyn) </div> <p class='mathjax'> The adoption of cardiovascular simulations for diagnosis and surgical planning on a patient-specific basis requires the development of faster methods than the existing state-of-the-art techniques. To address this need, we leverage the periodic nature of these flows to accurately capture their time-dependence using spectral discretization. Owing to the reduced size of the discrete problem, the resulting approach, known as the harmonic balance method, significantly lowers the solution cost when compared against the conventional time marching methods. This study describes a stabilized finite element implementation of the harmonic balanced method that targets the simulation of physically-stable time-periodic flows. That stabilized method is based on the Galerkin/least-squares formulation that permits stable solution in convection-dominant flows and convenient use of the same interpolation functions for velocity and pressure. We test this solver against its equivalent time marching method using three common physiological cases where blood flow is modeled in a Glenn operation, a cerebral artery, and a left main coronary artery. Using the conventional time marching solver, simulating these cases takes more than ten hours. That cost is reduced by up to two orders of magnitude when the proposed harmonic balance solver is utilized, where a solution is produced in approximately 30 minutes. We show that that solution is in excellent agreement with the conventional solvers when the number of modes is sufficiently large to accurately represent the imposed boundary conditions. </p> </div> </dd> <dt> <a name='item310'>[310]</a> <a href ="/abs/2411.14318" title="Abstract" id="2411.14318"> arXiv:2411.14318 </a> [<a href="/pdf/2411.14318" title="Download PDF" id="pdf-2411.14318" aria-labelledby="pdf-2411.14318">pdf</a>, <a href="https://arxiv.org/html/2411.14318v1" title="View HTML" id="html-2411.14318" aria-labelledby="html-2411.14318" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14318" title="Other formats" id="oth-2411.14318" aria-labelledby="oth-2411.14318">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Velocitune: A Velocity-based Dynamic Domain Reweighting Method for Continual Pre-training </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Luo,+Z">Zheheng Luo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+X">Xin Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+X">Xiao Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+H">Haoling Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gong,+Y">Yeyun Gong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Qi,+C">Chen Qi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cheng,+P">Peng Cheng</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Work in progress </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span> </div> <p class='mathjax'> It is well-known that a diverse corpus is critical for training large language models, which are typically constructed from a mixture of various domains. In general, previous efforts resort to sampling training data from different domains with static proportions, as well as adjusting data proportions during training. However, few methods have addressed the complexities of domain-adaptive continual pre-training. To fill this gap, we propose Velocitune, a novel framework dynamically assesses learning velocity and adjusts data proportions accordingly, favoring slower-learning domains while shunning faster-learning ones, which is guided by a scaling law to indicate the desired learning goal for each domain with less associated cost. To evaluate the effectiveness of Velocitune, we conduct experiments in a reasoning-focused dataset with CodeLlama, as well as in a corpus specialised for system command generation with Llama3 and Mistral. Velocitune achieves performance gains in both math and code reasoning tasks and command-line generation benchmarks. Further analysis reveals that key factors driving Velocitune's effectiveness include target loss prediction and data ordering. </p> </div> </dd> <dt> <a name='item311'>[311]</a> <a href ="/abs/2411.14319" title="Abstract" id="2411.14319"> arXiv:2411.14319 </a> [<a href="/pdf/2411.14319" title="Download PDF" id="pdf-2411.14319" aria-labelledby="pdf-2411.14319">pdf</a>, <a href="/format/2411.14319" title="Other formats" id="oth-2411.14319" aria-labelledby="oth-2411.14319">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Iteration-Free Cooperative Distributed MPC through Multiparametric Programming </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Saini,+R+S+T">Radhe S. T. Saini</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Brahmbhatt,+P+R">Parth R. Brahmbhatt</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Avraamidou,+S">Styliani Avraamidou</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Ganesh,+H+S">Hari S. Ganesh</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Systems and Control (eess.SY)</span> </div> <p class='mathjax'> Cooperative Distributed Model Predictive Control (DiMPC) architecture employs local MPC controllers to control different subsystems, exchanging information with each other through an iterative procedure to enhance overall control performance compared to the decentralized architecture. However, this method can result in high communication between the controllers and computational costs. In this work, the amount of information exchanged and the computational costs of DiMPC are reduced significantly by developing novel iteration-free solution algorithms based on multiparametric (mp) programming. These algorithms replace the iterative procedure with simultaneous solutions of explicit mpDiMPC control law functions. The reduced communication among local controllers decreases system latency, which is crucial for real-time control applications. The effectiveness of the proposed iteration-free mpDiMPC algorithms is demonstrated through comprehensive numerical simulations involving groups of coupled linear subsystems, which are interconnected through their inputs and a cooperative plant-wide cost function. </p> </div> </dd> <dt> <a name='item312'>[312]</a> <a href ="/abs/2411.14321" title="Abstract" id="2411.14321"> arXiv:2411.14321 </a> [<a href="/pdf/2411.14321" title="Download PDF" id="pdf-2411.14321" aria-labelledby="pdf-2411.14321">pdf</a>, <a href="https://arxiv.org/html/2411.14321v1" title="View HTML" id="html-2411.14321" aria-labelledby="html-2411.14321" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14321" title="Other formats" id="oth-2411.14321" aria-labelledby="oth-2411.14321">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Continual Learning and Lifting of Koopman Dynamics for Linear Control of Legged Robots </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+F">Feihan Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Abuduweili,+A">Abulikemu Abuduweili</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sun,+Y">Yifan Sun</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+R">Rui Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+W">Weiye Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+C">Changliu Liu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span> </div> <p class='mathjax'> The control of legged robots, particularly humanoid and quadruped robots, presents significant challenges due to their high-dimensional and nonlinear dynamics. While linear systems can be effectively controlled using methods like Model Predictive Control (MPC), the control of nonlinear systems remains complex. One promising solution is the Koopman Operator, which approximates nonlinear dynamics with a linear model, enabling the use of proven linear control techniques. However, achieving accurate linearization through data-driven methods is difficult due to issues like approximation error, domain shifts, and the limitations of fixed linear state-space representations. These challenges restrict the scalability of Koopman-based approaches. This paper addresses these challenges by proposing a continual learning algorithm designed to iteratively refine Koopman dynamics for high-dimensional legged robots. The key idea is to progressively expand the dataset and latent space dimension, enabling the learned Koopman dynamics to converge towards accurate approximations of the true system dynamics. Theoretical analysis shows that the linear approximation error of our method converges monotonically. Experimental results demonstrate that our method achieves high control performance on robots like Unitree G1/H1/A1/Go2 and ANYmal D, across various terrains using simple linear MPC controllers. This work is the first to successfully apply linearized Koopman dynamics for locomotion control of high-dimensional legged robots, enabling a scalable model-based control solution. </p> </div> </dd> <dt> <a name='item313'>[313]</a> <a href ="/abs/2411.14322" title="Abstract" id="2411.14322"> arXiv:2411.14322 </a> [<a href="/pdf/2411.14322" title="Download PDF" id="pdf-2411.14322" aria-labelledby="pdf-2411.14322">pdf</a>, <a href="https://arxiv.org/html/2411.14322v1" title="View HTML" id="html-2411.14322" aria-labelledby="html-2411.14322" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14322" title="Other formats" id="oth-2411.14322" aria-labelledby="oth-2411.14322">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> SplatR : Experience Goal Visual Rearrangement with 3D Gaussian Splatting and Dense Feature Matching </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=S,+A+P">Arjun P S</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Melnik,+A">Andrew Melnik</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nandi,+G+C">Gora Chand Nandi</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> Experience Goal Visual Rearrangement task stands as a foundational challenge within Embodied AI, requiring an agent to construct a robust world model that accurately captures the goal state. The agent uses this world model to restore a shuffled scene to its original configuration, making an accurate representation of the world essential for successfully completing the task. In this work, we present a novel framework that leverages on 3D Gaussian Splatting as a 3D scene representation for experience goal visual rearrangement task. Recent advances in volumetric scene representation like 3D Gaussian Splatting, offer fast rendering of high quality and photo-realistic novel views. Our approach enables the agent to have consistent views of the current and the goal setting of the rearrangement task, which enables the agent to directly compare the goal state and the shuffled state of the world in image space. To compare these views, we propose to use a dense feature matching method with visual features extracted from a foundation model, leveraging its advantages of a more universal feature representation, which facilitates robustness, and generalization. We validate our approach on the AI2-THOR rearrangement challenge benchmark and demonstrate improvements over the current state of the art methods </p> </div> </dd> <dt> <a name='item314'>[314]</a> <a href ="/abs/2411.14330" title="Abstract" id="2411.14330"> arXiv:2411.14330 </a> [<a href="/pdf/2411.14330" title="Download PDF" id="pdf-2411.14330" aria-labelledby="pdf-2411.14330">pdf</a>, <a href="https://arxiv.org/html/2411.14330v1" title="View HTML" id="html-2411.14330" aria-labelledby="html-2411.14330" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14330" title="Other formats" id="oth-2411.14330" aria-labelledby="oth-2411.14330">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Datalog with First-Class Facts </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Gilray,+T">Thomas Gilray</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sahebolamri,+A">Arash Sahebolamri</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sun,+Y">Yihao Sun</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kunapaneni,+S">Sowmith Kunapaneni</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kumar,+S">Sidharth Kumar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Micinski,+K">Kristopher Micinski</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> arXiv admin note: text overlap with <a href="https://arxiv.org/abs/2211.11573" data-arxiv-id="2211.11573" class="link-https">arXiv:2211.11573</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Databases (cs.DB)</span>; Programming Languages (cs.PL) </div> <p class='mathjax'> Datalog is a popular logic programming language for deductive reasoning tasks in a wide array of applications, including business analytics, program analysis, and ontological reasoning. However, Datalog's restriction to flat facts over atomic constants leads to challenges in working with tree-structured data, such as derivation trees or abstract syntax trees. To ameliorate Datalog's restrictions, popular extensions of Datalog support features such as existential quantification in rule heads (Datalog$^\pm$, Datalog$^\exists$) or algebraic data types (Souffl茅). Unfortunately, these are imperfect solutions for reasoning over structured and recursive data types, with general existentials leading to complex implementations requiring unification, and ADTs unable to trigger rule evaluation and failing to support efficient indexing. <br>We present DL$^{\exists!}$, a Datalog with first-class facts, wherein every fact is identified with a Skolem term unique to the fact. We show that this restriction offers an attractive price point for Datalog-based reasoning over tree-shaped data, demonstrating its application to databases, artificial intelligence, and programming languages. We implemented DL$^{\exists!}$ as a system \slog{}, which leverages the uniqueness restriction of DL$^{\exists!}$ to enable a communication-avoiding, massively-parallel implementation built on MPI. We show that Slog outperforms leading systems (Nemo, Vlog, RDFox, and Souffl茅) on a variety of benchmarks, with the potential to scale to thousands of threads. </p> </div> </dd> <dt> <a name='item315'>[315]</a> <a href ="/abs/2411.14331" title="Abstract" id="2411.14331"> arXiv:2411.14331 </a> [<a href="/pdf/2411.14331" title="Download PDF" id="pdf-2411.14331" aria-labelledby="pdf-2411.14331">pdf</a>, <a href="https://arxiv.org/html/2411.14331v1" title="View HTML" id="html-2411.14331" aria-labelledby="html-2411.14331" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14331" title="Other formats" id="oth-2411.14331" aria-labelledby="oth-2411.14331">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Data Formats in Analytical DBMSs: Performance Trade-offs and Future Directions </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+C">Chunwei Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pavlenko,+A">Anna Pavlenko</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Interlandi,+M">Matteo Interlandi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Haynes,+B">Brandon Haynes</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Databases (cs.DB)</span> </div> <p class='mathjax'> This paper evaluates the suitability of Apache Arrow, Parquet, and ORC as formats for subsumption in an analytical DBMS. We systematically identify and explore the high-level features that are important to support efficient querying in modern OLAP DBMSs and evaluate the ability of each format to support these features. We find that each format has trade-offs that make it more or less suitable for use as a format in a DBMS and identify opportunities to more holistically co-design a unified in-memory and on-disk data representation. Notably, for certain popular machine learning tasks, none of these formats perform optimally, highlighting significant opportunities for advancing format design. Our hope is that this study can be used as a guide for system developers designing and using these formats, as well as provide the community with directions to pursue for improving these common open formats. </p> </div> </dd> <dt> <a name='item316'>[316]</a> <a href ="/abs/2411.14332" title="Abstract" id="2411.14332"> arXiv:2411.14332 </a> [<a href="/pdf/2411.14332" title="Download PDF" id="pdf-2411.14332" aria-labelledby="pdf-2411.14332">pdf</a>, <a href="https://arxiv.org/html/2411.14332v1" title="View HTML" id="html-2411.14332" aria-labelledby="html-2411.14332" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14332" title="Other formats" id="oth-2411.14332" aria-labelledby="oth-2411.14332">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Continuous nonlinear adaptive experimental design with gradient flow </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Jin,+R">Ruhui Jin</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Li,+Q">Qin Li</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Mussmann,+S+O">Stephen O. Mussmann</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Wright,+S+J">Stephen J. Wright</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Numerical Analysis (math.NA)</span> </div> <p class='mathjax'> Identifying valuable measurements is one of the main challenges in computational inverse problems, often framed as the optimal experimental design (OED) problem. In this paper, we investigate nonlinear OED within a continuously-indexed design space. This is in contrast to the traditional approaches on selecting experiments from a finite measurement set. This formulation better reflects practical scenarios where measurements are taken continuously across spatial or temporal domains. However, optimizing over a continuously-indexed space introduces computational challenges. To address these, we employ gradient flow and optimal transport techniques, complemented by adaptive strategy for interactive optimization. Numerical results on the Lorenz 63 system and Schr枚dinger equation demonstrate that our solver identifies valuable measurements and achieves improved reconstruction of unknown parameters in inverse problems. </p> </div> </dd> <dt> <a name='item317'>[317]</a> <a href ="/abs/2411.14333" title="Abstract" id="2411.14333"> arXiv:2411.14333 </a> [<a href="/pdf/2411.14333" title="Download PDF" id="pdf-2411.14333" aria-labelledby="pdf-2411.14333">pdf</a>, <a href="https://arxiv.org/html/2411.14333v1" title="View HTML" id="html-2411.14333" aria-labelledby="html-2411.14333" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14333" title="Other formats" id="oth-2411.14333" aria-labelledby="oth-2411.14333">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Generalized Finite Difference Method for Solving Stochastic Diffusion Equations </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Mojarrad,+F+N">Faezeh Nassajian Mojarrad</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 22 pages </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Numerical Analysis (math.NA)</span> </div> <p class='mathjax'> Stochastic diffusion equations are crucial for modeling a range of physical phenomena influenced by uncertainties. We introduce the generalized finite difference method for solving these equations. Then, we examine its consistency, stability and convergence in mean-square, showing that the proposed method preserves stability and demonstrates favorable convergence characteristics under suitable assumptions. In order to validate the methodology, we present numerical results in one-, two-, and three-dimensional space domains. </p> </div> </dd> <dt> <a name='item318'>[318]</a> <a href ="/abs/2411.14343" title="Abstract" id="2411.14343"> arXiv:2411.14343 </a> [<a href="/pdf/2411.14343" title="Download PDF" id="pdf-2411.14343" aria-labelledby="pdf-2411.14343">pdf</a>, <a href="https://arxiv.org/html/2411.14343v1" title="View HTML" id="html-2411.14343" aria-labelledby="html-2411.14343" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14343" title="Other formats" id="oth-2411.14343" aria-labelledby="oth-2411.14343">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> UnifiedCrawl: Aggregated Common Crawl for Affordable Adaptation of LLMs on Low-Resource Languages </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Tessema,+B+M">Bethel Melesse Tessema</a> (1), <a href="https://arxiv.org/search/cs?searchtype=author&query=Kedia,+A">Akhil Kedia</a> (2), <a href="https://arxiv.org/search/cs?searchtype=author&query=Chung,+T">Tae-Sun Chung</a> (1) ((1) Ajou University, (2) Independent Researcher)</div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Large language models (LLMs) under-perform on low-resource languages due to limited training data. We present a method to efficiently collect text data for low-resource languages from the entire Common Crawl corpus. Our approach, UnifiedCrawl, filters and extracts common crawl using minimal compute resources, yielding mono-lingual datasets much larger than previously available sources. We demonstrate that leveraging this data to fine-tuning multilingual LLMs via efficient adapter methods (QLoRA) significantly boosts performance on the low-resource language, while minimizing VRAM usage. Our experiments show large improvements in language modeling perplexity and an increase in few-shot prompting scores. Our work and released source code provide an affordable approach to improve LLMs for low-resource languages using consumer hardware. Our source code is available here at <a href="https://github.com/bethelmelesse/unifiedcrawl" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item319'>[319]</a> <a href ="/abs/2411.14344" title="Abstract" id="2411.14344"> arXiv:2411.14344 </a> [<a href="/pdf/2411.14344" title="Download PDF" id="pdf-2411.14344" aria-labelledby="pdf-2411.14344">pdf</a>, <a href="https://arxiv.org/html/2411.14344v1" title="View HTML" id="html-2411.14344" aria-labelledby="html-2411.14344" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14344" title="Other formats" id="oth-2411.14344" aria-labelledby="oth-2411.14344">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Overcomplete Tensor Decomposition via Koszul-Young Flattenings </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Kothari,+P+K">Pravesh K. Kothari</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Moitra,+A">Ankur Moitra</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wein,+A+S">Alexander S. Wein</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 42 pages </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Data Structures and Algorithms (cs.DS)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Motivated by connections between algebraic complexity lower bounds and tensor decompositions, we investigate Koszul-Young flattenings, which are the main ingredient in recent lower bounds for matrix multiplication. Based on this tool we give a new algorithm for decomposing an $n_1 \times n_2 \times n_3$ tensor as the sum of a minimal number of rank-1 terms, and certifying uniqueness of this decomposition. For $n_1 \le n_2 \le n_3$ with $n_1 \to \infty$ and $n_3/n_2 = O(1)$, our algorithm is guaranteed to succeed when the tensor rank is bounded by $r \le (1-\epsilon)(n_2 + n_3)$ for an arbitrary $\epsilon > 0$, provided the tensor components are generically chosen. For any fixed $\epsilon$, the runtime is polynomial in $n_3$. When $n_2 = n_3 = n$, our condition on the rank gives a factor-of-2 improvement over the classical simultaneous diagonalization algorithm, which requires $r \le n$, and also improves on the recent algorithm of Koiran (2024) which requires $r \le 4n/3$. It also improves on the PhD thesis of Persu (2018) which solves rank detection for $r \leq 3n/2$. <br>We complement our upper bounds by showing limitations, in particular that no flattening of the style we consider can surpass rank $n_2 + n_3$. Furthermore, for $n \times n \times n$ tensors, we show that an even more general class of degree-$d$ polynomial flattenings cannot surpass rank $Cn$ for a constant $C = C(d)$. This suggests that for tensor decompositions, the case of generic components may be fundamentally harder than that of random components, where efficient decomposition is possible even in highly overcomplete settings. </p> </div> </dd> <dt> <a name='item320'>[320]</a> <a href ="/abs/2411.14345" title="Abstract" id="2411.14345"> arXiv:2411.14345 </a> [<a href="/pdf/2411.14345" title="Download PDF" id="pdf-2411.14345" aria-labelledby="pdf-2411.14345">pdf</a>, <a href="https://arxiv.org/html/2411.14345v1" title="View HTML" id="html-2411.14345" aria-labelledby="html-2411.14345" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14345" title="Other formats" id="oth-2411.14345" aria-labelledby="oth-2411.14345">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Layer Pruning with Consensus: A Triple-Win Solution </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Mugnaini,+L+G">Leandro Giusti Mugnaini</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Duarte,+C+T">Carolina Tavares Duarte</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Costa,+A+H+R">Anna H. Reali Costa</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jordao,+A">Artur Jordao</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> Layer pruning offers a promising alternative to standard structured pruning, effectively reducing computational costs, latency, and memory footprint. While notable layer-pruning approaches aim to detect unimportant layers for removal, they often rely on single criteria that may not fully capture the complex, underlying properties of layers. We propose a novel approach that combines multiple similarity metrics into a single expressive measure of low-importance layers, called the Consensus criterion. Our technique delivers a triple-win solution: low accuracy drop, high-performance improvement, and increased robustness to adversarial attacks. With up to 78.80% FLOPs reduction and performance on par with state-of-the-art methods across different benchmarks, our approach reduces energy consumption and carbon emissions by up to 66.99% and 68.75%, respectively. Additionally, it avoids shortcut learning and improves robustness by up to 4 percentage points under various adversarial attacks. Overall, the Consensus criterion demonstrates its effectiveness in creating robust, efficient, and environmentally friendly pruned models. </p> </div> </dd> <dt> <a name='item321'>[321]</a> <a href ="/abs/2411.14346" title="Abstract" id="2411.14346"> arXiv:2411.14346 </a> [<a href="/pdf/2411.14346" title="Download PDF" id="pdf-2411.14346" aria-labelledby="pdf-2411.14346">pdf</a>, <a href="https://arxiv.org/html/2411.14346v1" title="View HTML" id="html-2411.14346" aria-labelledby="html-2411.14346" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14346" title="Other formats" id="oth-2411.14346" aria-labelledby="oth-2411.14346">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Lower Dimensional Spherical Representation of Medium Voltage Load Profiles for Visualization, Outlier Detection, and Generative Modelling </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Duque,+E+M+S">Edgar Mauricio Salazar Duque</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=van+der+Holst,+B">Bart van der Holst</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Vergara,+P+P">Pedro P. Vergara</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Giraldo,+J+S">Juan S. Giraldo</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Nguyen,+P+H">Phuong H. Nguyen</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Van+der+Molen,+A">Anne Van der Molen</a>, Han (J.G.)<a href="https://arxiv.org/search/eess?searchtype=author&query=Slootweg">Slootweg</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Systems and Control (eess.SY)</span> </div> <p class='mathjax'> This paper presents the spherical lower dimensional representation for daily medium voltage load profiles, based on principal component analysis. The objective is to unify and simplify the tasks for (i) clustering visualisation, (ii) outlier detection and (iii) generative profile modelling under one concept. The lower dimensional projection of standardised load profiles unveils a latent distribution in a three-dimensional sphere. This spherical structure allows us to detect outliers by fitting probability distribution models in the spherical coordinate system, identifying measurements that deviate from the spherical shape. The same latent distribution exhibits an arc shape, suggesting an underlying order among load profiles. We develop a principal curve technique to uncover this order based on similarity, offering new advantages over conventional clustering techniques. This finding reveals that energy consumption in a wide region can be seen as a continuously changing process. Furthermore, we combined the principal curve with a von Mises-Fisher distribution to create a model capable of generating profiles with continuous mixtures between clusters. The presence of the spherical distribution is validated with data from four municipalities in the Netherlands. The uncovered spherical structure implies the possibility of employing new mathematical tools from directional statistics and differential geometry for load profile modelling. </p> </div> </dd> <dt> <a name='item322'>[322]</a> <a href ="/abs/2411.14347" title="Abstract" id="2411.14347"> arXiv:2411.14347 </a> [<a href="/pdf/2411.14347" title="Download PDF" id="pdf-2411.14347" aria-labelledby="pdf-2411.14347">pdf</a>, <a href="https://arxiv.org/html/2411.14347v1" title="View HTML" id="html-2411.14347" aria-labelledby="html-2411.14347" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14347" title="Other formats" id="oth-2411.14347" aria-labelledby="oth-2411.14347">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> DINO-X: A Unified Vision Model for Open-World Object Detection and Understanding </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ren,+T">Tianhe Ren</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+Y">Yihao Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jiang,+Q">Qing Jiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zeng,+Z">Zhaoyang Zeng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xiong,+Y">Yuda Xiong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+W">Wenlong Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ma,+Z">Zhengyu Ma</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shen,+J">Junyi Shen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gao,+Y">Yuan Gao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jiang,+X">Xiaoke Jiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+X">Xingyu Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Song,+Z">Zhuheng Song</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Y">Yuhong Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+H">Hongjie Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gao,+H">Han Gao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+S">Shilong Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+H">Hao Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+F">Feng Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+K">Kent Yu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+L">Lei Zhang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Technical Report </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> In this paper, we introduce DINO-X, which is a unified object-centric vision model developed by IDEA Research with the best open-world object detection performance to date. DINO-X employs the same Transformer-based encoder-decoder architecture as Grounding DINO 1.5 to pursue an object-level representation for open-world object understanding. To make long-tailed object detection easy, DINO-X extends its input options to support text prompt, visual prompt, and customized prompt. With such flexible prompt options, we develop a universal object prompt to support prompt-free open-world detection, making it possible to detect anything in an image without requiring users to provide any prompt. To enhance the model's core grounding capability, we have constructed a large-scale dataset with over 100 million high-quality grounding samples, referred to as Grounding-100M, for advancing the model's open-vocabulary detection performance. Pre-training on such a large-scale grounding dataset leads to a foundational object-level representation, which enables DINO-X to integrate multiple perception heads to simultaneously support multiple object perception and understanding tasks, including detection, segmentation, pose estimation, object captioning, object-based QA, etc. Experimental results demonstrate the superior performance of DINO-X. Specifically, the DINO-X Pro model achieves 56.0 AP, 59.8 AP, and 52.4 AP on the COCO, LVIS-minival, and LVIS-val zero-shot object detection benchmarks, respectively. Notably, it scores 63.3 AP and 56.5 AP on the rare classes of LVIS-minival and LVIS-val benchmarks, both improving the previous SOTA performance by 5.8 AP. Such a result underscores its significantly improved capacity for recognizing long-tailed objects. </p> </div> </dd> <dt> <a name='item323'>[323]</a> <a href ="/abs/2411.14349" title="Abstract" id="2411.14349"> arXiv:2411.14349 </a> [<a href="/pdf/2411.14349" title="Download PDF" id="pdf-2411.14349" aria-labelledby="pdf-2411.14349">pdf</a>, <a href="https://arxiv.org/html/2411.14349v1" title="View HTML" id="html-2411.14349" aria-labelledby="html-2411.14349" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14349" title="Other formats" id="oth-2411.14349" aria-labelledby="oth-2411.14349">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Agnostic Learning of Arbitrary ReLU Activation under Gaussian Marginals </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Guo,+A">Anxin Guo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Vijayaraghavan,+A">Aravindan Vijayaraghavan</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Data Structures and Algorithms (cs.DS) </div> <p class='mathjax'> We consider the problem of learning an arbitrarily-biased ReLU activation (or neuron) over Gaussian marginals with the squared loss objective. Despite the ReLU neuron being the basic building block of modern neural networks, we still do not understand the basic algorithmic question of whether one arbitrary ReLU neuron is learnable in the non-realizable setting. In particular, all existing polynomial time algorithms only provide approximation guarantees for the better-behaved unbiased setting or restricted bias setting. <br>Our main result is a polynomial time statistical query (SQ) algorithm that gives the first constant factor approximation for arbitrary bias. It outputs a ReLU activation that achieves a loss of $O(\mathrm{OPT}) + \varepsilon$ in time $\mathrm{poly}(d,1/\varepsilon)$, where $\mathrm{OPT}$ is the loss obtained by the optimal ReLU activation. Our algorithm presents an interesting departure from existing algorithms, which are all based on gradient descent and thus fall within the class of correlational statistical query (CSQ) algorithms. We complement our algorithmic result by showing that no polynomial time CSQ algorithm can achieve a constant factor approximation. Together, these results shed light on the intrinsic limitation of gradient descent, while identifying arguably the simplest setting (a single neuron) where there is a separation between SQ and CSQ algorithms. </p> </div> </dd> <dt> <a name='item324'>[324]</a> <a href ="/abs/2411.14354" title="Abstract" id="2411.14354"> arXiv:2411.14354 </a> [<a href="/pdf/2411.14354" title="Download PDF" id="pdf-2411.14354" aria-labelledby="pdf-2411.14354">pdf</a>, <a href="https://arxiv.org/html/2411.14354v1" title="View HTML" id="html-2411.14354" aria-labelledby="html-2411.14354" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14354" title="Other formats" id="oth-2411.14354" aria-labelledby="oth-2411.14354">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Contrasting local and global modeling with machine learning and satellite data: A case study estimating tree canopy height in African savannas </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Rolf,+E">Esther Rolf</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gordon,+L">Lucia Gordon</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tambe,+M">Milind Tambe</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Davies,+A">Andrew Davies</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 31 pages; 9 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> While advances in machine learning with satellite imagery (SatML) are facilitating environmental monitoring at a global scale, developing SatML models that are accurate and useful for local regions remains critical to understanding and acting on an ever-changing planet. As increasing attention and resources are being devoted to training SatML models with global data, it is important to understand when improvements in global models will make it easier to train or fine-tune models that are accurate in specific regions. To explore this question, we contrast local and global training paradigms for SatML through a case study of tree canopy height (TCH) mapping in the Karingani Game Reserve, Mozambique. We find that recent advances in global TCH mapping do not necessarily translate to better local modeling abilities in our study region. Specifically, small models trained only with locally-collected data outperform published global TCH maps, and even outperform globally pretrained models that we fine-tune using local data. Analyzing these results further, we identify specific points of conflict and synergy between local and global modeling paradigms that can inform future research toward aligning local and global performance objectives in geospatial machine learning. </p> </div> </dd> <dt> <a name='item325'>[325]</a> <a href ="/abs/2411.14356" title="Abstract" id="2411.14356"> arXiv:2411.14356 </a> [<a href="/pdf/2411.14356" title="Download PDF" id="pdf-2411.14356" aria-labelledby="pdf-2411.14356">pdf</a>, <a href="https://arxiv.org/html/2411.14356v1" title="View HTML" id="html-2411.14356" aria-labelledby="html-2411.14356" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14356" title="Other formats" id="oth-2411.14356" aria-labelledby="oth-2411.14356">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Convex Approximation of Probabilistic Reachable Sets from Small Samples Using Self-supervised Neural Networks </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Xiang,+J">Jun Xiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+J">Jun Chen</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 10 pages </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span> </div> <p class='mathjax'> Probabilistic Reachable Set (PRS) plays a crucial role in many fields of autonomous systems, yet efficiently generating PRS remains a significant challenge. This paper presents a learning approach to generating 2-dimensional PRS for states in a dynamic system. Traditional methods such as Hamilton-Jacobi reachability analysis, Monte Carlo, and Gaussian process classification face significant computational challenges or require detailed dynamics information, limiting their applicability in realistic situations. Existing data-driven methods may lack accuracy. To overcome these limitations, we propose leveraging neural networks, commonly used in imitation learning and computer vision, to imitate expert methods to generate PRS approximations. We trained the neural networks using a multi-label, self-supervised learning approach. We selected the fine-tuned convex approximation method as the expert to create expert PRS. Additionally, we continued sampling from the distribution to obtain a diverse array of sample sets. Given a small sample set, the trained neural networks can replicate the PRS approximation generated by the expert method, while the generation speed is much faster. </p> </div> </dd> <dt> <a name='item326'>[326]</a> <a href ="/abs/2411.14358" title="Abstract" id="2411.14358"> arXiv:2411.14358 </a> [<a href="/pdf/2411.14358" title="Download PDF" id="pdf-2411.14358" aria-labelledby="pdf-2411.14358">pdf</a>, <a href="https://arxiv.org/html/2411.14358v1" title="View HTML" id="html-2411.14358" aria-labelledby="html-2411.14358" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14358" title="Other formats" id="oth-2411.14358" aria-labelledby="oth-2411.14358">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> InCrowd-VI: A Realistic Visual-Inertial Dataset for Evaluating SLAM in Indoor Pedestrian-Rich Spaces for Human Navigation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Bamdad,+M">Marziyeh Bamdad</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hutter,+H">Hans-Peter Hutter</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Darvishy,+A">Alireza Darvishy</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 18 pages, 7 figures, 5 tabels </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> Simultaneous localization and mapping (SLAM) techniques can be used to navigate the visually impaired, but the development of robust SLAM solutions for crowded spaces is limited by the lack of realistic datasets. To address this, we introduce InCrowd-VI, a novel visual-inertial dataset specifically designed for human navigation in indoor pedestrian-rich environments. Recorded using Meta Aria Project glasses, it captures realistic scenarios without environmental control. InCrowd-VI features 58 sequences totaling a 5 km trajectory length and 1.5 hours of recording time, including RGB, stereo images, and IMU measurements. The dataset captures important challenges such as pedestrian occlusions, varying crowd densities, complex layouts, and lighting changes. Ground-truth trajectories, accurate to approximately 2 cm, are provided in the dataset, originating from the Meta Aria project machine perception SLAM service. In addition, a semi-dense 3D point cloud of scenes is provided for each sequence. The evaluation of state-of-the-art visual odometry (VO) and SLAM algorithms on InCrowd-VI revealed severe performance limitations in these realistic scenarios, demonstrating the need and value of the new dataset to advance SLAM research for visually impaired navigation in complex indoor environments. </p> </div> </dd> <dt> <a name='item327'>[327]</a> <a href ="/abs/2411.14361" title="Abstract" id="2411.14361"> arXiv:2411.14361 </a> [<a href="/pdf/2411.14361" title="Download PDF" id="pdf-2411.14361" aria-labelledby="pdf-2411.14361">pdf</a>, <a href="https://arxiv.org/html/2411.14361v1" title="View HTML" id="html-2411.14361" aria-labelledby="html-2411.14361" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14361" title="Other formats" id="oth-2411.14361" aria-labelledby="oth-2411.14361">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Improved Lower Bounds for all Odd-Query Locally Decodable Codes </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Basu,+A">Arpon Basu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hsieh,+J">Jun-Ting Hsieh</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kothari,+P+K">Pravesh K. Kothari</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lin,+A+D">Andrew D. Lin</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computational Complexity (cs.CC)</span>; Combinatorics (math.CO) </div> <p class='mathjax'> We prove that for every odd $q\geq 3$, any $q$-query binary, possibly non-linear locally decodable code ($q$-LDC) $E:\{\pm1\}^k \rightarrow \{\pm1\}^n$ must satisfy $k \leq \tilde{O}(n^{1-2/q})$. For even $q$, this bound was established in a sequence of prior works. For $q=3$, the above bound was achieved in a recent work of Alrabiah, Guruswami, Kothari and Manohar using an argument that crucially exploits known exponential lower bounds for $2$-LDCs. Their strategy hits an inherent bottleneck for $q \geq 5$. <br>Our key insight is identifying a general sufficient condition on the hypergraph of local decoding sets called $t$-approximate strong regularity. This condition demands that 1) the number of hyperedges containing any given subset of vertices of size $t$ (i.e., its co-degree) be equal to the same but arbitrary value $d_t$ up to a multiplicative constant slack, and 2) all other co-degrees be upper-bounded relative to $d_t$. This condition significantly generalizes related proposals in prior works that demand absolute upper bounds on all co-degrees. <br>We give an argument based on spectral bounds on Kikuchi Matrices that lower bounds the blocklength of any LDC whose local decoding sets satisfy $t$-approximate strong regularity for any $t \leq q$. Crucially, unlike prior works, our argument works despite having no non-trivial absolute upper bound on the co-degrees of any set of vertices. To apply our argument to arbitrary $q$-LDCs, we give a new, greedy, approximate strong regularity decomposition that shows that arbitrary, dense enough hypergraphs can be partitioned (up to a small error) into approximately strongly regular pieces satisfying the required relative bounds on the co-degrees. </p> </div> </dd> <dt> <a name='item328'>[328]</a> <a href ="/abs/2411.14365" title="Abstract" id="2411.14365"> arXiv:2411.14365 </a> [<a href="/pdf/2411.14365" title="Download PDF" id="pdf-2411.14365" aria-labelledby="pdf-2411.14365">pdf</a>, <a href="/format/2411.14365" title="Other formats" id="oth-2411.14365" aria-labelledby="oth-2411.14365">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Formal Simulation and Visualisation of Hybrid Programs </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Mendes,+P">Pedro Mendes</a> (University of Minho, Portugal), <a href="https://arxiv.org/search/eess?searchtype=author&query=Correia,+R">Ricardo Correia</a> (University of Minho, Portugal), <a href="https://arxiv.org/search/eess?searchtype=author&query=Neves,+R">Renato Neves</a> (INESC-TEC &amp; University of Minho, Portugal), <a href="https://arxiv.org/search/eess?searchtype=author&query=Proen%C3%A7a,+J">Jos茅 Proen莽a</a> (CISTER, Faculty of Sciences of the University of Porto, Portugal)</div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> In Proceedings FMAS2024, <a href="https://arxiv.org/abs/2411.13215" data-arxiv-id="2411.13215" class="link-https">arXiv:2411.13215</a> </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> EPTCS 411, 2024, pp. 20-37 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Systems and Control (eess.SY)</span>; Programming Languages (cs.PL) </div> <p class='mathjax'> The design and analysis of systems that combine computational behaviour with physical processes' continuous dynamics - such as movement, velocity, and voltage - is a famous, challenging task. Several theoretical results from programming theory emerged in the last decades to tackle the issue; some of which are the basis of a proof-of-concept tool, called Lince, that aids in the analysis of such systems, by presenting simulations of their respective behaviours. <br>However being a proof-of-concept, the tool is quite limited with respect to usability, and when attempting to apply it to a set of common, concrete problems, involving autonomous driving and others, it either simply cannot simulate them or fails to provide a satisfactory user-experience. <br>The current work complements the aforementioned theoretical approaches with a more practical perspective, by improving Lince along several dimensions: to name a few, richer syntactic constructs, more operations, more informative plotting systems and errors messages, and a better performance overall. We illustrate our improvements via a variety of examples that involve both autonomous driving and electrical systems. </p> </div> </dd> <dt> <a name='item329'>[329]</a> <a href ="/abs/2411.14367" title="Abstract" id="2411.14367"> arXiv:2411.14367 </a> [<a href="/pdf/2411.14367" title="Download PDF" id="pdf-2411.14367" aria-labelledby="pdf-2411.14367">pdf</a>, <a href="/format/2411.14367" title="Other formats" id="oth-2411.14367" aria-labelledby="oth-2411.14367">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> ROSMonitoring 2.0: Extending ROS Runtime Verification to Services and Ordered Topics </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Saadat,+M+G">Maryam Ghaffari Saadat</a> (University of Manchester), <a href="https://arxiv.org/search/cs?searchtype=author&query=Ferrando,+A">Angelo Ferrando</a> (University of Modena and Reggio Emilia), <a href="https://arxiv.org/search/cs?searchtype=author&query=Dennis,+L+A">Louise A. Dennis</a> (University of Manchester), <a href="https://arxiv.org/search/cs?searchtype=author&query=Fisher,+M">Michael Fisher</a> (University of Manchester)</div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> In Proceedings FMAS2024, <a href="https://arxiv.org/abs/2411.13215" data-arxiv-id="2411.13215" class="link-https">arXiv:2411.13215</a> </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> EPTCS 411, 2024, pp. 38-55 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Software Engineering (cs.SE)</span>; Artificial Intelligence (cs.AI); Robotics (cs.RO) </div> <p class='mathjax'> Formal verification of robotic applications presents challenges due to their hybrid nature and distributed architecture. This paper introduces ROSMonitoring 2.0, an extension of ROSMonitoring designed to facilitate the monitoring of both topics and services while considering the order in which messages are published and received. The framework has been enhanced to support these novel features for ROS1 -- and partially ROS2 environments -- offering improved real-time support, security, scalability, and interoperability. We discuss the modifications made to accommodate these advancements and present results obtained from a case study involving the runtime monitoring of specific components of a fire-fighting Uncrewed Aerial Vehicle (UAV). </p> </div> </dd> <dt> <a name='item330'>[330]</a> <a href ="/abs/2411.14368" title="Abstract" id="2411.14368"> arXiv:2411.14368 </a> [<a href="/pdf/2411.14368" title="Download PDF" id="pdf-2411.14368" aria-labelledby="pdf-2411.14368">pdf</a>, <a href="/format/2411.14368" title="Other formats" id="oth-2411.14368" aria-labelledby="oth-2411.14368">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> RV4Chatbot: Are Chatbots Allowed to Dream of Electric Sheep? </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Gatti,+A">Andrea Gatti</a> (University of Genoa), <a href="https://arxiv.org/search/cs?searchtype=author&query=Mascardi,+V">Viviana Mascardi</a> (University of Genoa), <a href="https://arxiv.org/search/cs?searchtype=author&query=Ferrando,+A">Angelo Ferrando</a> (University of Modena and Reggio Emilia)</div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> In Proceedings FMAS2024, <a href="https://arxiv.org/abs/2411.13215" data-arxiv-id="2411.13215" class="link-https">arXiv:2411.13215</a> </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> EPTCS 411, 2024, pp. 73-90 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Artificial Intelligence (cs.AI)</span>; Human-Computer Interaction (cs.HC); Software Engineering (cs.SE) </div> <p class='mathjax'> Chatbots have become integral to various application domains, including those with safety-critical considerations. As a result, there is a pressing need for methods that ensure chatbots consistently adhere to expected, safe behaviours. In this paper, we introduce RV4Chatbot, a Runtime Verification framework designed to monitor deviations in chatbot behaviour. We formalise expected behaviours as interaction protocols between the user and the chatbot. We present the RV4Chatbot design and describe two implementations that instantiate it: RV4Rasa, for monitoring chatbots created with the Rasa framework, and RV4Dialogflow, for monitoring Dialogflow chatbots. Additionally, we detail experiments conducted in a factory automation scenario using both RV4Rasa and RV4Dialogflow. </p> </div> </dd> <dt> <a name='item331'>[331]</a> <a href ="/abs/2411.14369" title="Abstract" id="2411.14369"> arXiv:2411.14369 </a> [<a href="/pdf/2411.14369" title="Download PDF" id="pdf-2411.14369" aria-labelledby="pdf-2411.14369">pdf</a>, <a href="/format/2411.14369" title="Other formats" id="oth-2411.14369" aria-labelledby="oth-2411.14369">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Model Checking and Verification of Synchronisation Properties of Cobot Welding </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Murray,+Y">Yvonne Murray</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nordlie,+H">Henrik Nordlie</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Anisi,+D+A">David A. Anisi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ribeiro,+P">Pedro Ribeiro</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cavalcanti,+A">Ana Cavalcanti</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> In Proceedings FMAS2024, <a href="https://arxiv.org/abs/2411.13215" data-arxiv-id="2411.13215" class="link-https">arXiv:2411.13215</a> </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> EPTCS 411, 2024, pp. 91-108 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span>; Multiagent Systems (cs.MA); Software Engineering (cs.SE) </div> <p class='mathjax'> This paper describes use of model checking to verify synchronisation properties of an industrial welding system consisting of a cobot arm and an external turntable. The robots must move synchronously, but sometimes get out of synchronisation, giving rise to unsatisfactory weld qualities in problem areas, such as around corners. These mistakes are costly, since time is lost both in the robotic welding and in manual repairs needed to improve the weld. Verification of the synchronisation properties has shown that they are fulfilled as long as assumptions of correctness made about parts outside the scope of the model hold, indicating limitations in the hardware. These results have indicated the source of the problem, and motivated a re-calibration of the real-life system. This has drastically improved the welding results, and is a demonstration of how formal methods can be useful in an industrial setting. </p> </div> </dd> <dt> <a name='item332'>[332]</a> <a href ="/abs/2411.14371" title="Abstract" id="2411.14371"> arXiv:2411.14371 </a> [<a href="/pdf/2411.14371" title="Download PDF" id="pdf-2411.14371" aria-labelledby="pdf-2411.14371">pdf</a>, <a href="/format/2411.14371" title="Other formats" id="oth-2411.14371" aria-labelledby="oth-2411.14371">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Synthesising Robust Controllers for Robot Collectives with Recurrent Tasks: A Case Study </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Schnittka,+T">Till Schnittka</a> (University of Bremen), <a href="https://arxiv.org/search/cs?searchtype=author&query=Gleirscher,+M">Mario Gleirscher</a> (University of Bremen)</div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> In Proceedings FMAS2024, <a href="https://arxiv.org/abs/2411.13215" data-arxiv-id="2411.13215" class="link-https">arXiv:2411.13215</a> </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> EPTCS 411, 2024, pp. 109-125 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Multiagent Systems (cs.MA)</span>; Artificial Intelligence (cs.AI); Robotics (cs.RO) </div> <p class='mathjax'> When designing correct-by-construction controllers for autonomous collectives, three key challenges are the task specification, the modelling, and its use at practical scale. In this paper, we focus on a simple yet useful abstraction for high-level controller synthesis for robot collectives with optimisation goals (e.g., maximum cleanliness, minimum energy consumption) and recurrence (e.g., re-establish contamination and charge thresholds) and safety (e.g., avoid full discharge, mutually exclusive room occupation) constraints. Due to technical limitations (related to scalability and using constraints in the synthesis), we simplify our graph-based setting from a stochastic two-player game into a single-player game on a partially observable Markov decision process (POMDP). Robustness against environmental uncertainty is encoded via partial observability. Linear-time correctness properties are verified separately after synthesising the POMDP strategy. We contribute at-scale guidance on POMDP modelling and controller synthesis for tasked robot collectives exemplified by the scenario of battery-driven robots responsible for cleaning public buildings with utilisation constraints. </p> </div> </dd> <dt> <a name='item333'>[333]</a> <a href ="/abs/2411.14372" title="Abstract" id="2411.14372"> arXiv:2411.14372 </a> [<a href="/pdf/2411.14372" title="Download PDF" id="pdf-2411.14372" aria-labelledby="pdf-2411.14372">pdf</a>, <a href="/format/2411.14372" title="Other formats" id="oth-2411.14372" aria-labelledby="oth-2411.14372">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Case Study on Numerical Analysis of a Path Computation Algorithm </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Boussu,+G">Gr茅goire Boussu</a> (Thales Research and Technology), <a href="https://arxiv.org/search/cs?searchtype=author&query=Kosmatov,+N">Nikolai Kosmatov</a> (Thales Research and Technology), <a href="https://arxiv.org/search/cs?searchtype=author&query=V%C3%A9drine,+F">Franck V茅drine</a> (Universit茅 Paris-Saclay, CEA, List)</div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> In Proceedings FMAS2024, <a href="https://arxiv.org/abs/2411.13215" data-arxiv-id="2411.13215" class="link-https">arXiv:2411.13215</a> </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> EPTCS 411, 2024, pp. 126-142 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Software Engineering (cs.SE)</span> </div> <p class='mathjax'> Lack of numerical precision in control software -- in particular, related to trajectory computation -- can lead to incorrect results with costly or even catastrophic consequences. Various tools have been proposed to analyze the precision of program computations. This paper presents a case study on numerical analysis of an industrial implementation of the fast marching algorithm, a popular path computation algorithm frequently used for trajectory computation. We briefly describe the selected tools, present the applied methodology, highlight some attention points, summarize the results and outline future work directions. </p> </div> </dd> <dt> <a name='item334'>[334]</a> <a href ="/abs/2411.14373" title="Abstract" id="2411.14373"> arXiv:2411.14373 </a> [<a href="/pdf/2411.14373" title="Download PDF" id="pdf-2411.14373" aria-labelledby="pdf-2411.14373">pdf</a>, <a href="/format/2411.14373" title="Other formats" id="oth-2411.14373" aria-labelledby="oth-2411.14373">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Cross--layer Formal Verification of Robotic Systems </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ra%C3%AFs,+S">Sylvain Ra茂s</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Brunel,+J">Julien Brunel</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Doose,+D">David Doose</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Herbreteau,+F">Fr茅d茅ric Herbreteau</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> In Proceedings FMAS2024, <a href="https://arxiv.org/abs/2411.13215" data-arxiv-id="2411.13215" class="link-https">arXiv:2411.13215</a> </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> EPTCS 411, 2024, pp. 143-150 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span> </div> <p class='mathjax'> Robotic systems are widely used to interact with humans or to perform critical tasks. As a result, it is imperative to provide guarantees about their behavior. Due to the modularity and complexity of robotic systems, their design and verification are often divided into several layers. However, some system properties can only be investigated by considering multiple layers simultaneously. We propose a cross-layer verification method to verify the expected properties of concrete robotic systems. Our method verifies one layer using abstractions of other layers. We propose two approaches: refining the models of the abstract layers and refining the property under verification. A combination of these two approaches seems to be the most promising to ensure model genericity and to avoid the state-space explosion problem. </p> </div> </dd> <dt> <a name='item335'>[335]</a> <a href ="/abs/2411.14374" title="Abstract" id="2411.14374"> arXiv:2411.14374 </a> [<a href="/pdf/2411.14374" title="Download PDF" id="pdf-2411.14374" aria-labelledby="pdf-2411.14374">pdf</a>, <a href="/format/2411.14374" title="Other formats" id="oth-2411.14374" aria-labelledby="oth-2411.14374">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Using Formal Models, Safety Shields and Certified Control to Validate AI-Based Train Systems </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Gruteser,+J">Jan Gruteser</a> (Heinrich Heine University D眉sseldorf), <a href="https://arxiv.org/search/cs?searchtype=author&query=Ro%C3%9Fbach,+J">Jan Ro脽bach</a> (Heinrich Heine University D眉sseldorf), <a href="https://arxiv.org/search/cs?searchtype=author&query=Vu,+F">Fabian Vu</a> (Heinrich Heine University D眉sseldorf), <a href="https://arxiv.org/search/cs?searchtype=author&query=Leuschel,+M">Michael Leuschel</a> (Heinrich Heine University D眉sseldorf)</div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> In Proceedings FMAS2024, <a href="https://arxiv.org/abs/2411.13215" data-arxiv-id="2411.13215" class="link-https">arXiv:2411.13215</a> </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> EPTCS 411, 2024, pp. 151-159 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Logic in Computer Science (cs.LO)</span>; Artificial Intelligence (cs.AI); Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> The certification of autonomous systems is an important concern in science and industry. The KI-LOK project explores new methods for certifying and safely integrating AI components into autonomous trains. We pursued a two-layered approach: (1) ensuring the safety of the steering system by formal analysis using the B method, and (2) improving the reliability of the perception system with a runtime certificate checker. This work links both strategies within a demonstrator that runs simulations on the formal model, controlled by the real AI output and the real certificate checker. The demonstrator is integrated into the validation tool ProB. This enables runtime monitoring, runtime verification, and statistical validation of formal safety properties using a formal B model. Consequently, one can detect and analyse potential vulnerabilities and weaknesses of the AI and the certificate checker. We apply these techniques to a signal detection case study and present our findings. </p> </div> </dd> <dt> <a name='item336'>[336]</a> <a href ="/abs/2411.14375" title="Abstract" id="2411.14375"> arXiv:2411.14375 </a> [<a href="/pdf/2411.14375" title="Download PDF" id="pdf-2411.14375" aria-labelledby="pdf-2411.14375">pdf</a>, <a href="/format/2411.14375" title="Other formats" id="oth-2411.14375" aria-labelledby="oth-2411.14375">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Model Checking for Reinforcement Learning in Autonomous Driving: One Can Do More Than You Think! </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Gu,+R">Rong Gu</a> (M盲lardalen University)</div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> In Proceedings FMAS2024, <a href="https://arxiv.org/abs/2411.13215" data-arxiv-id="2411.13215" class="link-https">arXiv:2411.13215</a> </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> EPTCS 411, 2024, pp. 160-177 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Logic in Computer Science (cs.LO) </div> <p class='mathjax'> Most reinforcement learning (RL) platforms use high-level programming languages, such as OpenAI Gymnasium using Python. These frameworks provide various API and benchmarks for testing RL algorithms in different domains, such as autonomous driving (AD) and robotics. These platforms often emphasise the design of RL algorithms and the training performance but neglect the correctness of models and reward functions, which can be crucial for the successful application of RL. This paper proposes using formal methods to model AD systems and demonstrates how model checking (MC) can be used in RL for AD. Most studies combining MC and RL focus on safety, such as safety shields. However, this paper shows different facets where MC can strengthen RL. First, an MC-based model pre-analysis can reveal bugs with respect to sensor accuracy and learning step size. This step serves as a preparation of RL, which saves time if bugs exist and deepens users' understanding of the target system. Second, reward automata can benefit the design of reward functions and greatly improve learning performance especially when the learning objectives are multiple. All these findings are supported by experiments. </p> </div> </dd> <dt> <a name='item337'>[337]</a> <a href ="/abs/2411.14381" title="Abstract" id="2411.14381"> arXiv:2411.14381 </a> [<a href="/pdf/2411.14381" title="Download PDF" id="pdf-2411.14381" aria-labelledby="pdf-2411.14381">pdf</a>, <a href="https://arxiv.org/html/2411.14381v1" title="View HTML" id="html-2411.14381" aria-labelledby="html-2411.14381" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14381" title="Other formats" id="oth-2411.14381" aria-labelledby="oth-2411.14381">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> ETA-IK: Execution-Time-Aware Inverse Kinematics for Dual-Arm Systems </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Tang,+Y">Yucheng Tang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+X">Xi Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Y">Yongzhou Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+T">Tao Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mamaev,+I">Ilshat Mamaev</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hein,+B">Bj枚rn Hein</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span> </div> <p class='mathjax'> This paper presents ETA-IK, a novel Execution-Time-Aware Inverse Kinematics method tailored for dual-arm robotic systems. The primary goal is to optimize motion execution time by leveraging the redundancy of both arms, specifically in tasks where only the relative pose of the robots is constrained, such as dual-arm scanning of unknown objects. Unlike traditional inverse kinematics methods that use surrogate metrics such as joint configuration distance, our method incorporates direct motion execution time and implicit collisions into the optimization process, thereby finding target joints that allow subsequent trajectory generation to get more efficient and collision-free motion. A neural network based execution time approximator is employed to predict time-efficient joint configurations while accounting for potential collisions. Through experimental evaluation on a system composed of a UR5 and a KUKA iiwa robot, we demonstrate significant reductions in execution time. The proposed method outperforms conventional approaches, showing improved motion efficiency without sacrificing positioning accuracy. These results highlight the potential of ETA-IK to improve the performance of dual-arm systems in applications, where efficiency and safety are paramount. </p> </div> </dd> <dt> <a name='item338'>[338]</a> <a href ="/abs/2411.14384" title="Abstract" id="2411.14384"> arXiv:2411.14384 </a> [<a href="/pdf/2411.14384" title="Download PDF" id="pdf-2411.14384" aria-labelledby="pdf-2411.14384">pdf</a>, <a href="https://arxiv.org/html/2411.14384v1" title="View HTML" id="html-2411.14384" aria-labelledby="html-2411.14384" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14384" title="Other formats" id="oth-2411.14384" aria-labelledby="oth-2411.14384">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Baking Gaussian Splatting into Diffusion Denoiser for Fast and Scalable Single-stage Image-to-3D Generation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Cai,+Y">Yuanhao Cai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+H">He Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+K">Kai Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liang,+Y">Yixun Liang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ren,+M">Mengwei Ren</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Luan,+F">Fujun Luan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Q">Qing Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+S+Y">Soo Ye Kim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+J">Jianming Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Z">Zhifei Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+Y">Yuqian Zhou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lin,+Z">Zhe Lin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yuille,+A">Alan Yuille</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> A novel one-stage 3DGS-based diffusion generates objects and scenes from a single view in ~6 seconds </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Graphics (cs.GR) </div> <p class='mathjax'> Existing feed-forward image-to-3D methods mainly rely on 2D multi-view diffusion models that cannot guarantee 3D consistency. These methods easily collapse when changing the prompt view direction and mainly handle object-centric prompt images. In this paper, we propose a novel single-stage 3D diffusion model, DiffusionGS, for object and scene generation from a single view. DiffusionGS directly outputs 3D Gaussian point clouds at each timestep to enforce view consistency and allow the model to generate robustly given prompt views of any directions, beyond object-centric inputs. Plus, to improve the capability and generalization ability of DiffusionGS, we scale up 3D training data by developing a scene-object mixed training strategy. Experiments show that our method enjoys better generation quality (2.20 dB higher in PSNR and 23.25 lower in FID) and over 5x faster speed (~6s on an A100 GPU) than SOTA methods. The user study and text-to-3D applications also reveals the practical values of our method. Our Project page at <a href="https://caiyuanhao1998.github.io/project/DiffusionGS/" rel="external noopener nofollow" class="link-external link-https">this https URL</a> shows the video and interactive generation results. </p> </div> </dd> <dt> <a name='item339'>[339]</a> <a href ="/abs/2411.14386" title="Abstract" id="2411.14386"> arXiv:2411.14386 </a> [<a href="/pdf/2411.14386" title="Download PDF" id="pdf-2411.14386" aria-labelledby="pdf-2411.14386">pdf</a>, <a href="https://arxiv.org/html/2411.14386v1" title="View HTML" id="html-2411.14386" aria-labelledby="html-2411.14386" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14386" title="Other formats" id="oth-2411.14386" aria-labelledby="oth-2411.14386">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Learning Humanoid Locomotion with Perceptive Internal Model </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Long,+J">Junfeng Long</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ren,+J">Junli Ren</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shi,+M">Moji Shi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Z">Zirui Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+T">Tao Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Luo,+P">Ping Luo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pang,+J">Jiangmiao Pang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> submitted to ICRA2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span> </div> <p class='mathjax'> In contrast to quadruped robots that can navigate diverse terrains using a "blind" policy, humanoid robots require accurate perception for stable locomotion due to their high degrees of freedom and inherently unstable morphology. However, incorporating perceptual signals often introduces additional disturbances to the system, potentially reducing its robustness, generalizability, and efficiency. This paper presents the Perceptive Internal Model (PIM), which relies on onboard, continuously updated elevation maps centered around the robot to perceive its surroundings. We train the policy using ground-truth obstacle heights surrounding the robot in simulation, optimizing it based on the Hybrid Internal Model (HIM), and perform inference with heights sampled from the constructed elevation map. Unlike previous methods that directly encode depth maps or raw point clouds, our approach allows the robot to perceive the terrain beneath its feet clearly and is less affected by camera movement or noise. Furthermore, since depth map rendering is not required in simulation, our method introduces minimal additional computational costs and can train the policy in 3 hours on an RTX 4090 GPU. We verify the effectiveness of our method across various humanoid robots, various indoor and outdoor terrains, stairs, and various sensor configurations. Our method can enable a humanoid robot to continuously climb stairs and has the potential to serve as a foundational algorithm for the development of future humanoid control methods. </p> </div> </dd> <dt> <a name='item340'>[340]</a> <a href ="/abs/2411.14387" title="Abstract" id="2411.14387"> arXiv:2411.14387 </a> [<a href="/pdf/2411.14387" title="Download PDF" id="pdf-2411.14387" aria-labelledby="pdf-2411.14387">pdf</a>, <a href="/format/2411.14387" title="Other formats" id="oth-2411.14387" aria-labelledby="oth-2411.14387">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Hardness Amplification for Dynamic Binary Search Trees </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Jiang,+S">Shunhua Jiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lecomte,+V">Victor Lecomte</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Weinstein,+O">Omri Weinstein</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yingchareonthawornchai,+S">Sorrachai Yingchareonthawornchai</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Data Structures and Algorithms (cs.DS)</span> </div> <p class='mathjax'> We prove direct-sum theorems for Wilber's two lower bounds [Wilber, FOCS'86] on the cost of access sequences in the binary search tree (BST) model. These bounds are central to the question of dynamic optimality [Sleator and Tarjan, JACM'85]: the Alternation bound is the only bound to have yielded online BST algorithms beating $\log n$ competitive ratio, while the Funnel bound has repeatedly been conjectured to exactly characterize the cost of executing an access sequence using the optimal tree [Wilber, FOCS'86, Kozma'16], and has been explicitly linked to splay trees [Levy and Tarjan, SODA'19]. Previously, the direct-sum theorem for the Alternation bound was known only when approximation was allowed [Chalermsook, Chuzhoy and Saranurak, APPROX'20, ToC'24]. <br>We use these direct-sum theorems to amplify the sequences from [Lecomte and Weinstein, ESA'20] that separate between Wilber's Alternation and Funnel bounds, increasing the Alternation and Funnel bounds while optimally maintaining the separation. As a corollary, we show that Tango trees [Demaine et al., FOCS'04] are optimal among any BST algorithms that charge their costs to the Alternation bound. This is true for any value of the Alternation bound, even values for which Tango trees achieve a competitive ratio of $o(\log \log n)$ instead of the default $O(\log \log n)$. Previously, the optimality of Tango trees was shown only for a limited range of Alternation bound [Lecomte and Weinstein, ESA'20]. </p> </div> </dd> <dt> <a name='item341'>[341]</a> <a href ="/abs/2411.14393" title="Abstract" id="2411.14393"> arXiv:2411.14393 </a> [<a href="/pdf/2411.14393" title="Download PDF" id="pdf-2411.14393" aria-labelledby="pdf-2411.14393">pdf</a>, <a href="/format/2411.14393" title="Other formats" id="oth-2411.14393" aria-labelledby="oth-2411.14393">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> POS-tagging to highlight the skeletal structure of sentences </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Churakov,+G">Grigorii Churakov</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> in Russian language. Conference: Automated control systems and information technologies <a href="https://asuit.pstu.ru/" rel="external noopener nofollow" class="link-external link-https">this https URL</a> Section: IT and automated systems </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span> </div> <p class='mathjax'> This study presents the development of a part-of-speech (POS) tagging model to extract the skeletal structure of sentences using transfer learning with the BERT architecture for token classification. The model, fine-tuned on Russian text, demonstrating its effectiveness. The approach offers potential applications in enhancing natural language processing tasks, such as improving machine translation. <br>Keywords: part of speech tagging, morphological analysis, natural language processing, BERT. </p> </div> </dd> <dt> <a name='item342'>[342]</a> <a href ="/abs/2411.14394" title="Abstract" id="2411.14394"> arXiv:2411.14394 </a> [<a href="/pdf/2411.14394" title="Download PDF" id="pdf-2411.14394" aria-labelledby="pdf-2411.14394">pdf</a>, <a href="https://arxiv.org/html/2411.14394v1" title="View HTML" id="html-2411.14394" aria-labelledby="html-2411.14394" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14394" title="Other formats" id="oth-2411.14394" aria-labelledby="oth-2411.14394">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Securing Legacy Communication Networks via Authenticated Cyclic Redundancy Integrity Check </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Lotto,+A">Alessandro Lotto</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Brighente,+A">Alessandro Brighente</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Conti,+M">Mauro Conti</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Cryptography and Security (cs.CR)</span> </div> <p class='mathjax'> Integrating modern communication technologies into legacy systems, such as Industrial Control Systems and in-vehicle networks, invalidates the assumptions of isolated and trusted operating environments. Security incidents like the 2015 Ukraine power grid attack and the 2021 compromise of a U.S. water treatment facility demonstrate how increased interconnectivity, paired with insufficient security measures, expose these critical systems to cyber threats, posing risks to national and public safety. These attacks were favored by the lack of proper message authentication, highlighting its importance as a primary countermeasure to enhance system security. Solutions proposed in the literature remain largely unadopted in practice due to challenges such as preserving backward compatibility, additional hardware requirements, and limited computational resources on legacy devices. Moreover, many solutions are protocol-specific, necessitating complex and costly multiple implementations in heterogeneous systems. <br>In this paper, we propose Authenticated Cyclic Redundancy Integrity Check (ACRIC), a novel security mechanism that overcomes these limitations by leveraging a cryptographic computation of the existing Cyclyic Redundancy Check (CRC) field to ensure message integrity protection and authentication. ACRIC preserves backward compatibility without requiring additional hardware and is protocol agnostic. This makes it applicable across various systems, suitable for diverse legacy network protocols including point-to-point and broadcast communications. Experimental results, supported by formal verification and real-world testing, demonstrate that ACRIC offers robust security with minimal transmission overhead (<< 1 ms). This proves ACRIC's practicality, cost-effectiveness, and suitability for real-world adoption. </p> </div> </dd> <dt> <a name='item343'>[343]</a> <a href ="/abs/2411.14398" title="Abstract" id="2411.14398"> arXiv:2411.14398 </a> [<a href="/pdf/2411.14398" title="Download PDF" id="pdf-2411.14398" aria-labelledby="pdf-2411.14398">pdf</a>, <a href="https://arxiv.org/html/2411.14398v1" title="View HTML" id="html-2411.14398" aria-labelledby="html-2411.14398" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14398" title="Other formats" id="oth-2411.14398" aria-labelledby="oth-2411.14398">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Lightweight Safety Guardrails Using Fine-tuned BERT Embeddings </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zheng,+A">Aaron Zheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rana,+M">Mansi Rana</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Stolcke,+A">Andreas Stolcke</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> To appear in Proceedings of COLING 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span> </div> <p class='mathjax'> With the recent proliferation of large language models (LLMs), enterprises have been able to rapidly develop proof-of-concepts and prototypes. As a result, there is a growing need to implement robust guardrails that monitor, quantize and control an LLM's behavior, ensuring that the use is reliable, safe, accurate and also aligned with the users' expectations. Previous approaches for filtering out inappropriate user prompts or system outputs, such as LlamaGuard and OpenAI's MOD API, have achieved significant success by fine-tuning existing LLMs. However, using fine-tuned LLMs as guardrails introduces increased latency and higher maintenance costs, which may not be practical or scalable for cost-efficient deployments. We take a different approach, focusing on fine-tuning a lightweight architecture: Sentence-BERT. This method reduces the model size from LlamaGuard's 7 billion parameters to approximately 67 million, while maintaining comparable performance on the AEGIS safety benchmark. </p> </div> </dd> <dt> <a name='item344'>[344]</a> <a href ="/abs/2411.14399" title="Abstract" id="2411.14399"> arXiv:2411.14399 </a> [<a href="/pdf/2411.14399" title="Download PDF" id="pdf-2411.14399" aria-labelledby="pdf-2411.14399">pdf</a>, <a href="https://arxiv.org/html/2411.14399v1" title="View HTML" id="html-2411.14399" aria-labelledby="html-2411.14399" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14399" title="Other formats" id="oth-2411.14399" aria-labelledby="oth-2411.14399">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> DiscoTEX 1.0: Discontinuous collocation and implicit-turned-explicit (IMTEX) integration symplectic, symmetric numerical algorithms with high order jumps for differential equations II: extension to higher-orders of numerical convergence </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Da+Silva,+L+J+G">Lidia J. Gomes Da Silva</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 15 pages, 5 figures, 2 tables. Second paper of a series of papers. See <a href="http://gr.qc" rel="external noopener nofollow" class="link-external link-http">this http URL</a>:<a href="https://arxiv.org/abs/2401.08758" data-arxiv-id="2401.08758" class="link-https">2401.08758</a> for application of these algorithms to numerical black hole perturbation theory. Comments welcomed. arXiv admin note: text overlap with <a href="https://arxiv.org/abs/2401.08758" data-arxiv-id="2401.08758" class="link-https">arXiv:2401.08758</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Numerical Analysis (math.NA)</span>; Instrumentation and Methods for Astrophysics (astro-ph.IM); General Relativity and Quantum Cosmology (gr-qc); Mathematical Physics (math-ph) </div> <p class='mathjax'> \texttt{DiscoTEX} is a highly accurate numerical algorithm for computing numerical weak-form solutions to distributionally sourced partial differential equations (PDE)s. The aim of this second paper, succeeding \cite{da2024discotex}, is to present its extension up to twelve orders. This will be demonstrated by computing numerical weak-form solutions to the distributionally sourced wave equation and comparing it to its exact solutions. The full details of the numerical scheme at higher orders will be presented. </p> </div> </dd> <dt> <a name='item345'>[345]</a> <a href ="/abs/2411.14400" title="Abstract" id="2411.14400"> arXiv:2411.14400 </a> [<a href="/pdf/2411.14400" title="Download PDF" id="pdf-2411.14400" aria-labelledby="pdf-2411.14400">pdf</a>, <a href="https://arxiv.org/html/2411.14400v1" title="View HTML" id="html-2411.14400" aria-labelledby="html-2411.14400" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14400" title="Other formats" id="oth-2411.14400" aria-labelledby="oth-2411.14400">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> 23 DoF Grasping Policies from a Raw Point Cloud </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Matak,+M">Martin Matak</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Van+Wyk,+K">Karl Van Wyk</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hermans,+T">Tucker Hermans</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> IEEE International Conference on Robotics and Automation (ICRA) Workshop on Geometric Representations 2023 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span> </div> <p class='mathjax'> Coordinating the motion of robots with high degrees of freedom (DoF) to grasp objects gives rise to many challenges. In this paper, we propose a novel imitation learning approach to learn a policy that directly predicts 23 DoF grasp trajectories from a partial point cloud provided by a single, fixed camera. At the core of the approach is a second-order geometric-based model of behavioral dynamics. This Neural Geometric Fabric (NGF) policy predicts accelerations directly in joint space. We show that our policy is capable of generalizing to novel objects, and combine our policy with a geometric fabric motion planner in a loop to generate stable grasping trajectories. We evaluate our approach on a set of three different objects, compare different policy structures, and run ablation studies to understand the importance of different object encodings for policy learning. </p> </div> </dd> <dt> <a name='item346'>[346]</a> <a href ="/abs/2411.14401" title="Abstract" id="2411.14401"> arXiv:2411.14401 </a> [<a href="/pdf/2411.14401" title="Download PDF" id="pdf-2411.14401" aria-labelledby="pdf-2411.14401">pdf</a>, <a href="https://arxiv.org/html/2411.14401v1" title="View HTML" id="html-2411.14401" aria-labelledby="html-2411.14401" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14401" title="Other formats" id="oth-2411.14401" aria-labelledby="oth-2411.14401">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Beyond Training: Dynamic Token Merging for Zero-Shot Video Understanding </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Y">Yiming Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+Z">Zhuokai Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+Z">Zhaorun Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ding,+Z">Zenghui Ding</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+X">Xianjun Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sun,+Y">Yining Sun</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Recent advancements in multimodal large language models (MLLMs) have opened new avenues for video understanding. However, achieving high fidelity in zero-shot video tasks remains challenging. Traditional video processing methods rely heavily on fine-tuning to capture nuanced spatial-temporal details, which incurs significant data and computation costs. In contrast, training-free approaches, though efficient, often lack robustness in preserving context-rich features across complex video content. To this end, we propose DYTO, a novel dynamic token merging framework for zero-shot video understanding that adaptively optimizes token efficiency while preserving crucial scene details. DYTO integrates a hierarchical frame selection and a bipartite token merging strategy to dynamically cluster key frames and selectively compress token sequences, striking a balance between computational efficiency with semantic richness. Extensive experiments across multiple benchmarks demonstrate the effectiveness of DYTO, achieving superior performance compared to both fine-tuned and training-free methods and setting a new state-of-the-art for zero-shot video understanding. </p> </div> </dd> <dt> <a name='item347'>[347]</a> <a href ="/abs/2411.14402" title="Abstract" id="2411.14402"> arXiv:2411.14402 </a> [<a href="/pdf/2411.14402" title="Download PDF" id="pdf-2411.14402" aria-labelledby="pdf-2411.14402">pdf</a>, <a href="/format/2411.14402" title="Other formats" id="oth-2411.14402" aria-labelledby="oth-2411.14402">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Multimodal Autoregressive Pre-training of Large Vision Encoders </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Fini,+E">Enrico Fini</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shukor,+M">Mustafa Shukor</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+X">Xiujun Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dufter,+P">Philipp Dufter</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Klein,+M">Michal Klein</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Haldimann,+D">David Haldimann</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Aitharaju,+S">Sai Aitharaju</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=da+Costa,+V+G+T">Victor Guilherme Turrisi da Costa</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=B%C3%A9thune,+L">Louis B茅thune</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gan,+Z">Zhe Gan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Toshev,+A+T">Alexander T Toshev</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Eichner,+M">Marcin Eichner</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nabi,+M">Moin Nabi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+Y">Yinfei Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Susskind,+J+M">Joshua M. Susskind</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=El-Nouby,+A">Alaaeldin El-Nouby</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> <a href="https://github.com/apple/ml-aim" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> We introduce a novel method for pre-training of large-scale vision encoders. Building on recent advancements in autoregressive pre-training of vision models, we extend this framework to a multimodal setting, i.e., images and text. In this paper, we present AIMV2, a family of generalist vision encoders characterized by a straightforward pre-training process, scalability, and remarkable performance across a range of downstream tasks. This is achieved by pairing the vision encoder with a multimodal decoder that autoregressively generates raw image patches and text tokens. Our encoders excel not only in multimodal evaluations but also in vision benchmarks such as localization, grounding, and classification. Notably, our AIMV2-3B encoder achieves 89.5% accuracy on ImageNet-1k with a frozen trunk. Furthermore, AIMV2 consistently outperforms state-of-the-art contrastive models (e.g., CLIP, SigLIP) in multimodal image understanding across diverse settings. </p> </div> </dd> <dt> <a name='item348'>[348]</a> <a href ="/abs/2411.14403" title="Abstract" id="2411.14403"> arXiv:2411.14403 </a> [<a href="/pdf/2411.14403" title="Download PDF" id="pdf-2411.14403" aria-labelledby="pdf-2411.14403">pdf</a>, <a href="https://arxiv.org/html/2411.14403v1" title="View HTML" id="html-2411.14403" aria-labelledby="html-2411.14403" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14403" title="Other formats" id="oth-2411.14403" aria-labelledby="oth-2411.14403">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Landing Trajectory Prediction for UAS Based on Generative Adversarial Network </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Xiang,+J">Jun Xiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Essick,+D">Drake Essick</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bautista,+L+G">Luiz Gonzalez Bautista</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xie,+J">Junfei Xie</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+J">Jun Chen</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 9 pages, AIAA SCITECH 2023 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Models for trajectory prediction are an essential component of many advanced air mobility studies. These models help aircraft detect conflict and plan avoidance maneuvers, which is especially important in Unmanned Aircraft systems (UAS) landing management due to the congested airspace near vertiports. In this paper, we propose a landing trajectory prediction model for UAS based on Generative Adversarial Network (GAN). The GAN is a prestigious neural network that has been developed for many years. In previous research, GAN has achieved many state-of-the-art results in many generation tasks. The GAN consists of one neural network generator and a neural network discriminator. Because of the learning capacity of the neural networks, the generator is capable to understand the features of the sample trajectory. The generator takes the previous trajectory as input and outputs some random status of a flight. According to the results of the experiences, the proposed model can output more accurate predictions than the baseline method(GMR) in various datasets. To evaluate the proposed model, we also create a real UAV landing dataset that includes more than 2600 trajectories of drone control manually by real pilots. </p> </div> </dd> <dt> <a name='item349'>[349]</a> <a href ="/abs/2411.14404" title="Abstract" id="2411.14404"> arXiv:2411.14404 </a> [<a href="/pdf/2411.14404" title="Download PDF" id="pdf-2411.14404" aria-labelledby="pdf-2411.14404">pdf</a>, <a href="https://arxiv.org/html/2411.14404v1" title="View HTML" id="html-2411.14404" aria-labelledby="html-2411.14404" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14404" title="Other formats" id="oth-2411.14404" aria-labelledby="oth-2411.14404">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Resolving Multiple-Dynamic Model Uncertainty in Hypothesis-Driven Belief-MDPs </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Dagan,+O">Ofer Dagan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Becker,+T">Tyler Becker</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sunberg,+Z+N">Zachary N. Sunberg</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 8 pages, 4 figures, submitted to AAMAS 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Artificial Intelligence (cs.AI)</span>; Robotics (cs.RO) </div> <p class='mathjax'> When human operators of cyber-physical systems encounter surprising behavior, they often consider multiple hypotheses that might explain it. In some cases, taking information-gathering actions such as additional measurements or control inputs given to the system can help resolve uncertainty and determine the most accurate hypothesis. The task of optimizing these actions can be formulated as a belief-space Markov decision process that we call a hypothesis-driven belief MDP. Unfortunately, this problem suffers from the curse of history similar to a partially observable Markov decision process (POMDP). To plan in continuous domains, an agent needs to reason over countlessly many possible action-observation histories, each resulting in a different belief over the unknown state. The problem is exacerbated in the hypothesis-driven context because each action-observation pair spawns a different belief for each hypothesis, leading to additional branching. This paper considers the case in which each hypothesis corresponds to a different dynamic model in an underlying POMDP. We present a new belief MDP formulation that: (i) enables reasoning over multiple hypotheses, (ii) balances the goals of determining the (most likely) correct hypothesis and performing well in the underlying POMDP, and (iii) can be solved with sparse tree search. </p> </div> </dd> <dt> <a name='item350'>[350]</a> <a href ="/abs/2411.14405" title="Abstract" id="2411.14405"> arXiv:2411.14405 </a> [<a href="/pdf/2411.14405" title="Download PDF" id="pdf-2411.14405" aria-labelledby="pdf-2411.14405">pdf</a>, <a href="https://arxiv.org/html/2411.14405v1" title="View HTML" id="html-2411.14405" aria-labelledby="html-2411.14405" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14405" title="Other formats" id="oth-2411.14405" aria-labelledby="oth-2411.14405">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Marco-o1: Towards Open Reasoning Models for Open-Ended Solutions </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+Y">Yu Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yin,+H">Huifeng Yin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zeng,+B">Bo Zeng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+H">Hao Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shi,+T">Tianqi Shi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lyu,+C">Chenyang Lyu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+L">Longyue Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Luo,+W">Weihua Luo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+K">Kaifu Zhang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span> </div> <p class='mathjax'> Currently OpenAI o1 has sparked a surge of interest in the study of large reasoning models (LRM). Building on this momentum, Marco-o1 not only focuses on disciplines with standard answers, such as mathematics, physics, and coding -- which are well-suited for reinforcement learning (RL) -- but also places greater emphasis on open-ended resolutions. We aim to address the question: "Can the o1 model effectively generalize to broader domains where clear standards are absent and rewards are challenging to quantify?" Marco-o1 is powered by Chain-of-Thought (CoT) fine-tuning, Monte Carlo Tree Search (MCTS), reflection mechanisms, and innovative reasoning strategies -- optimized for complex real-world problem-solving tasks. </p> </div> </dd> <dt> <a name='item351'>[351]</a> <a href ="/abs/2411.14409" title="Abstract" id="2411.14409"> arXiv:2411.14409 </a> [<a href="/pdf/2411.14409" title="Download PDF" id="pdf-2411.14409" aria-labelledby="pdf-2411.14409">pdf</a>, <a href="https://arxiv.org/html/2411.14409v1" title="View HTML" id="html-2411.14409" aria-labelledby="html-2411.14409" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14409" title="Other formats" id="oth-2411.14409" aria-labelledby="oth-2411.14409">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Inexact Generalized Golub-Kahan Methods for Large-Scale Bayesian Inverse Problems </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Bu,+Y">Yutong Bu</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Chung,+J">Julianne Chung</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Numerical Analysis (math.NA)</span> </div> <p class='mathjax'> Solving large-scale Bayesian inverse problems presents significant challenges, particularly when the exact (discretized) forward operator is unavailable. These challenges often arise in image processing tasks due to unknown defects in the forward process that may result in varying degrees of inexactness in the forward model. Moreover, for many large-scale problems, computing the square root or inverse of the prior covariance matrix is infeasible such as when the covariance kernel is defined on irregular grids or is accessible only through matrix-vector products. This paper introduces an efficient approach by developing an inexact generalized Golub-Kahan decomposition that can incorporate varying degrees of inexactness in the forward model to solve large-scale generalized Tikhonov regularized problems. Further, a hybrid iterative projection scheme is developed to automatically select Tikhonov regularization parameters. Numerical experiments on simulated tomography reconstructions demonstrate the stability and effectiveness of this novel hybrid approach. </p> </div> </dd> <dt> <a name='item352'>[352]</a> <a href ="/abs/2411.14411" title="Abstract" id="2411.14411"> arXiv:2411.14411 </a> [<a href="/pdf/2411.14411" title="Download PDF" id="pdf-2411.14411" aria-labelledby="pdf-2411.14411">pdf</a>, <a href="https://arxiv.org/html/2411.14411v1" title="View HTML" id="html-2411.14411" aria-labelledby="html-2411.14411" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14411" title="Other formats" id="oth-2411.14411" aria-labelledby="oth-2411.14411">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Multi-Agent Environments for Vehicle Routing Problems </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Gama,+R">Ricardo Gama</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fuertes,+D">Daniel Fuertes</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=del-Blanco,+C+R">Carlos R. del-Blanco</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fernandes,+H+L">Hugo L. Fernandes</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Multiagent Systems (cs.MA) </div> <p class='mathjax'> Research on Reinforcement Learning (RL) approaches for discrete optimization problems has increased considerably, extending RL to an area classically dominated by Operations Research (OR). Vehicle routing problems are a good example of discrete optimization problems with high practical relevance where RL techniques have had considerable success. Despite these advances, open-source development frameworks remain scarce, hampering both the testing of algorithms and the ability to objectively compare results. This ultimately slows down progress in the field and limits the exchange of ideas between the RL and OR communities. <br>Here we propose a library composed of multi-agent environments that simulates classic vehicle routing problems. The library, built on PyTorch, provides a flexible modular architecture design that allows easy customization and incorporation of new routing problems. It follows the Agent Environment Cycle ("AEC") games model and has an intuitive API, enabling rapid adoption and easy integration into existing reinforcement learning frameworks. <br>The library allows for a straightforward use of classical OR benchmark instances in order to narrow the gap between the test beds for algorithm benchmarking used by the RL and OR communities. Additionally, we provide benchmark instance sets for each environment, as well as baseline RL models and training code. </p> </div> </dd> <dt> <a name='item353'>[353]</a> <a href ="/abs/2411.14420" title="Abstract" id="2411.14420"> arXiv:2411.14420 </a> [<a href="/pdf/2411.14420" title="Download PDF" id="pdf-2411.14420" aria-labelledby="pdf-2411.14420">pdf</a>, <a href="/format/2411.14420" title="Other formats" id="oth-2411.14420" aria-labelledby="oth-2411.14420">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Aggregating Funnels for Faster Fetch&Add and Queues </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Roh,+Y">Younghun Roh</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wei,+Y">Yuanhao Wei</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ruppert,+E">Eric Ruppert</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fatourou,+P">Panagiota Fatourou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jayanti,+S">Siddhartha Jayanti</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shun,+J">Julian Shun</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> This is the full version of the paper appearing in PPoPP 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Distributed, Parallel, and Cluster Computing (cs.DC)</span> </div> <p class='mathjax'> Many concurrent algorithms require processes to perform fetch-and-add operations on a single memory location, which can be a hot spot of contention. We present a novel algorithm called Aggregating Funnels that reduces this contention by spreading the fetch-and-add operations across multiple memory locations. It aggregates fetch-and-add operations into batches so that the batch can be performed by a single hardware fetch-and-add instruction on one location and all operations in the batch can efficiently compute their results by performing a fetch-and-add instruction on a different location. We show experimentally that this approach achieves higher throughput than previous combining techniques, such as Combining Funnels, and is substantially more scalable than applying hardware fetch-and-add instructions on a single memory location. We show that replacing the fetch-and-add instructions in the fastest state-of-the-art concurrent queue by our Aggregating Funnels eliminates a bottleneck and greatly improves the queue's overall throughput. </p> </div> </dd> <dt> <a name='item354'>[354]</a> <a href ="/abs/2411.14421" title="Abstract" id="2411.14421"> arXiv:2411.14421 </a> [<a href="/pdf/2411.14421" title="Download PDF" id="pdf-2411.14421" aria-labelledby="pdf-2411.14421">pdf</a>, <a href="https://arxiv.org/html/2411.14421v1" title="View HTML" id="html-2411.14421" aria-labelledby="html-2411.14421" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14421" title="Other formats" id="oth-2411.14421" aria-labelledby="oth-2411.14421">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> From RNNs to Foundation Models: An Empirical Study on Commercial Building Energy Consumption </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Bose,+S">Shourya Bose</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+Y">Yijiang Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Van+Sant,+A">Amy Van Sant</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Y">Yu Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+K">Kibaek Kim</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> NeurIPS 2024 Workshop on Time Series in the Age of Large Models </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> Accurate short-term energy consumption forecasting for commercial buildings is crucial for smart grid operations. While smart meters and deep learning models enable forecasting using past data from multiple buildings, data heterogeneity from diverse buildings can reduce model performance. The impact of increasing dataset heterogeneity in time series forecasting, while keeping size and model constant, is understudied. We tackle this issue using the ComStock dataset, which provides synthetic energy consumption data for U.S. commercial buildings. Two curated subsets, identical in size and region but differing in building type diversity, are used to assess the performance of various time series forecasting models, including fine-tuned open-source foundation models (FMs). The results show that dataset heterogeneity and model architecture have a greater impact on post-training forecasting performance than the parameter count. Moreover, despite the higher computational cost, fine-tuned FMs demonstrate competitive performance compared to base models trained from scratch. </p> </div> </dd> <dt> <a name='item355'>[355]</a> <a href ="/abs/2411.14423" title="Abstract" id="2411.14423"> arXiv:2411.14423 </a> [<a href="/pdf/2411.14423" title="Download PDF" id="pdf-2411.14423" aria-labelledby="pdf-2411.14423">pdf</a>, <a href="https://arxiv.org/html/2411.14423v1" title="View HTML" id="html-2411.14423" aria-labelledby="html-2411.14423" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14423" title="Other formats" id="oth-2411.14423" aria-labelledby="oth-2411.14423">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Unleashing the Potential of Multi-modal Foundation Models and Video Diffusion for 4D Dynamic Physical Scene Simulation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Z">Zhuoman Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ye,+W">Weicai Ye</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Luximon,+Y">Yan Luximon</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wan,+P">Pengfei Wan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+D">Di Zhang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Homepage: <a href="https://zhuomanliu.github.io/PhysFlow/" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Realistic simulation of dynamic scenes requires accurately capturing diverse material properties and modeling complex object interactions grounded in physical principles. However, existing methods are constrained to basic material types with limited predictable parameters, making them insufficient to represent the complexity of real-world materials. We introduce a novel approach that leverages multi-modal foundation models and video diffusion to achieve enhanced 4D dynamic scene simulation. Our method utilizes multi-modal models to identify material types and initialize material parameters through image queries, while simultaneously inferring 3D Gaussian splats for detailed scene representation. We further refine these material parameters using video diffusion with a differentiable Material Point Method (MPM) and optical flow guidance rather than render loss or Score Distillation Sampling (SDS) loss. This integrated framework enables accurate prediction and realistic simulation of dynamic interactions in real-world scenarios, advancing both accuracy and flexibility in physics-based simulations. </p> </div> </dd> <dt> <a name='item356'>[356]</a> <a href ="/abs/2411.14424" title="Abstract" id="2411.14424"> arXiv:2411.14424 </a> [<a href="/pdf/2411.14424" title="Download PDF" id="pdf-2411.14424" aria-labelledby="pdf-2411.14424">pdf</a>, <a href="https://arxiv.org/html/2411.14424v1" title="View HTML" id="html-2411.14424" aria-labelledby="html-2411.14424" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14424" title="Other formats" id="oth-2411.14424" aria-labelledby="oth-2411.14424">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Learning Fair Robustness via Domain Mixup </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhong,+M">Meiyu Zhong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tandon,+R">Ravi Tandon</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Cryptography and Security (cs.CR); Computers and Society (cs.CY) </div> <p class='mathjax'> Adversarial training is one of the predominant techniques for training classifiers that are robust to adversarial attacks. Recent work, however has found that adversarial training, which makes the overall classifier robust, it does not necessarily provide equal amount of robustness for all classes. In this paper, we propose the use of mixup for the problem of learning fair robust classifiers, which can provide similar robustness across all classes. Specifically, the idea is to mix inputs from the same classes and perform adversarial training on mixed up inputs. We present a theoretical analysis of this idea for the case of linear classifiers and show that mixup combined with adversarial training can provably reduce the class-wise robustness disparity. This method not only contributes to reducing the disparity in class-wise adversarial risk, but also the class-wise natural risk. Complementing our theoretical analysis, we also provide experimental results on both synthetic data and the real world dataset (CIFAR-10), which shows improvement in class wise disparities for both natural and adversarial risks. </p> </div> </dd> <dt> <a name='item357'>[357]</a> <a href ="/abs/2411.14425" title="Abstract" id="2411.14425"> arXiv:2411.14425 </a> [<a href="/pdf/2411.14425" title="Download PDF" id="pdf-2411.14425" aria-labelledby="pdf-2411.14425">pdf</a>, <a href="https://arxiv.org/html/2411.14425v1" title="View HTML" id="html-2411.14425" aria-labelledby="html-2411.14425" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14425" title="Other formats" id="oth-2411.14425" aria-labelledby="oth-2411.14425">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Whack-a-Chip: The Futility of Hardware-Centric Export Controls </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Gupta,+R">Ritwik Gupta</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Walker,+L">Leah Walker</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Reddie,+A+W">Andrew W. Reddie</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computers and Society (cs.CY)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> U.S. export controls on semiconductors are widely known to be permeable, with the People's Republic of China (PRC) steadily creating state-of-the-art artificial intelligence (AI) models with exfiltrated chips. This paper presents the first concrete, public evidence of how leading PRC AI labs evade and circumvent U.S. export controls. We examine how Chinese companies, notably Tencent, are not only using chips that are restricted under U.S. export controls but are also finding ways to circumvent these regulations by using software and modeling techniques that maximize less capable hardware. Specifically, we argue that Tencent's ability to power its Hunyuan-Large model with non-export controlled NVIDIA H20s exemplifies broader gains in efficiency in machine learning that have eroded the moat that the United States initially built via its existing export controls. Finally, we examine the implications of this finding for the future of the United States' export control strategy. </p> </div> </dd> <dt> <a name='item358'>[358]</a> <a href ="/abs/2411.14427" title="Abstract" id="2411.14427"> arXiv:2411.14427 </a> [<a href="/pdf/2411.14427" title="Download PDF" id="pdf-2411.14427" aria-labelledby="pdf-2411.14427">pdf</a>, <a href="https://arxiv.org/html/2411.14427v1" title="View HTML" id="html-2411.14427" aria-labelledby="html-2411.14427" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14427" title="Other formats" id="oth-2411.14427" aria-labelledby="oth-2411.14427">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Transformer-based Heuristic for Advanced Air Mobility Planning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Xiang,+J">Jun Xiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+J">Jun Chen</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 2024 AIAA DATC/IEEE 43rd Digital Avionics Systems Conference (DASC) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span> </div> <p class='mathjax'> Safety is extremely important for urban flights of autonomous Unmanned Aerial Vehicles (UAVs). Risk-aware path planning is one of the most effective methods to guarantee the safety of UAVs. This type of planning can be represented as a Constrained Shortest Path (CSP) problem, which seeks to find the shortest route that meets a predefined safety constraint. Solving CSP problems is NP-hard, presenting significant computational challenges. Although traditional methods can accurately solve CSP problems, they tend to be very slow. Previously, we introduced an additional safety dimension to the traditional A* algorithm, known as ASD A*, to effectively handle Constrained Shortest Path (CSP) problems. Then, we developed a custom learning-based heuristic using transformer-based neural networks, which significantly reduced computational load and enhanced the performance of the ASD A* algorithm. In this paper, we expand our dataset to include more risk maps and tasks, improve the proposed model, and increase its performance. We also introduce a new heuristic strategy and a novel neural network, which enhance the overall effectiveness of our approach. </p> </div> </dd> <dt> <a name='item359'>[359]</a> <a href ="/abs/2411.14429" title="Abstract" id="2411.14429"> arXiv:2411.14429 </a> [<a href="/pdf/2411.14429" title="Download PDF" id="pdf-2411.14429" aria-labelledby="pdf-2411.14429">pdf</a>, <a href="https://arxiv.org/html/2411.14429v1" title="View HTML" id="html-2411.14429" aria-labelledby="html-2411.14429" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14429" title="Other formats" id="oth-2411.14429" aria-labelledby="oth-2411.14429">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Revisiting the Integration of Convolution and Attention for Vision Backbone </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+L">Lei Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+X">Xinjiang Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+W">Wayne Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lau,+R+W+H">Rynson W. H. Lau</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> NeurIPS 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Convolutions (Convs) and multi-head self-attentions (MHSAs) are typically considered alternatives to each other for building vision backbones. Although some works try to integrate both, they apply the two operators simultaneously at the finest pixel granularity. With Convs responsible for per-pixel feature extraction already, the question is whether we still need to include the heavy MHSAs at such a fine-grained level. In fact, this is the root cause of the scalability issue w.r.t. the input resolution for vision transformers. To address this important problem, we propose in this work to use MSHAs and Convs in parallel \textbf{at different granularity levels} instead. Specifically, in each layer, we use two different ways to represent an image: a fine-grained regular grid and a coarse-grained set of semantic slots. We apply different operations to these two representations: Convs to the grid for local features, and MHSAs to the slots for global features. A pair of fully differentiable soft clustering and dispatching modules is introduced to bridge the grid and set representations, thus enabling local-global fusion. Through extensive experiments on various vision tasks, we empirically verify the potential of the proposed integration scheme, named \textit{GLMix}: by offloading the burden of fine-grained features to light-weight Convs, it is sufficient to use MHSAs in a few (e.g., 64) semantic slots to match the performance of recent state-of-the-art backbones, while being more efficient. Our visualization results also demonstrate that the soft clustering module produces a meaningful semantic grouping effect with only IN1k classification supervision, which may induce better interpretability and inspire new weakly-supervised semantic segmentation approaches. Code will be available at \url{<a href="https://github.com/rayleizhu/GLMix" rel="external noopener nofollow" class="link-external link-https">this https URL</a>}. </p> </div> </dd> <dt> <a name='item360'>[360]</a> <a href ="/abs/2411.14430" title="Abstract" id="2411.14430"> arXiv:2411.14430 </a> [<a href="/pdf/2411.14430" title="Download PDF" id="pdf-2411.14430" aria-labelledby="pdf-2411.14430">pdf</a>, <a href="https://arxiv.org/html/2411.14430v1" title="View HTML" id="html-2411.14430" aria-labelledby="html-2411.14430" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14430" title="Other formats" id="oth-2411.14430" aria-labelledby="oth-2411.14430">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Stable Flow: Vital Layers for Training-Free Image Editing </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Avrahami,+O">Omri Avrahami</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Patashnik,+O">Or Patashnik</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fried,+O">Ohad Fried</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nemchinov,+E">Egor Nemchinov</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Aberman,+K">Kfir Aberman</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lischinski,+D">Dani Lischinski</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cohen-Or,+D">Daniel Cohen-Or</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Project page is available at <a href="https://omriavrahami.com/stable-flow" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Graphics (cs.GR); Machine Learning (cs.LG) </div> <p class='mathjax'> Diffusion models have revolutionized the field of content synthesis and editing. Recent models have replaced the traditional UNet architecture with the Diffusion Transformer (DiT), and employed flow-matching for improved training and sampling. However, they exhibit limited generation diversity. In this work, we leverage this limitation to perform consistent image edits via selective injection of attention features. The main challenge is that, unlike the UNet-based models, DiT lacks a coarse-to-fine synthesis structure, making it unclear in which layers to perform the injection. Therefore, we propose an automatic method to identify "vital layers" within DiT, crucial for image formation, and demonstrate how these layers facilitate a range of controlled stable edits, from non-rigid modifications to object addition, using the same mechanism. Next, to enable real-image editing, we introduce an improved image inversion method for flow models. Finally, we evaluate our approach through qualitative and quantitative comparisons, along with a user study, and demonstrate its effectiveness across multiple applications. The project page is available at <a href="https://omriavrahami.com/stable-flow" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </p> </div> </dd> <dt> <a name='item361'>[361]</a> <a href ="/abs/2411.14431" title="Abstract" id="2411.14431"> arXiv:2411.14431 </a> [<a href="/pdf/2411.14431" title="Download PDF" id="pdf-2411.14431" aria-labelledby="pdf-2411.14431">pdf</a>, <a href="https://arxiv.org/html/2411.14431v1" title="View HTML" id="html-2411.14431" aria-labelledby="html-2411.14431" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14431" title="Other formats" id="oth-2411.14431" aria-labelledby="oth-2411.14431">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> On Optimal Testing of Linearity </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Arora,+V">Vipul Arora</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kelman,+E">Esty Kelman</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Meir,+U">Uri Meir</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> To appear at SOSA 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computational Complexity (cs.CC)</span>; Data Structures and Algorithms (cs.DS) </div> <p class='mathjax'> Linearity testing has been a focal problem in property testing of functions. We combine different known techniques and observations about linearity testing in order to resolve two recent versions of this task. <br>First, we focus on the online manipulations model introduced by Kalemaj, Raskhodnikova and Varma (ITCS 2022 \& Theory of Computing 2023). In this model, up to $t$ data entries are adversarially manipulated after each query is answered. Ben-Eliezer, Kelman, Meir, and Raskhodnikova (ITCS 2024) showed an asymptotically optimal linearity tester that is resilient to $t$ manipulations per query, but their approach fails if $t$ is too large. We extend this result, showing an optimal tester for almost any possible value of $t$. First, we simplify their result when $t$ is small, and for larger values of $t$ we instead use sample-based testers, as defined by Goldreich and Ron (ACM Transactions on Computation Theory 2016). A key observation is that sample-based testing is resilient to online manipulations, but still achieves optimal query complexity for linearity when $t$ is large. We complement our result by showing that when $t$ is \emph{very} large, any reasonable property, and in particular linearity, cannot be tested at all. <br>Second, we consider linearity over the reals with proximity parameter $\varepsilon$. Fleming and Yoshida (ITCS 2020) gave a tester using $O(1/\varepsilon\ \cdot log(1/\varepsilon))$ queries. We simplify their algorithms and modify the analysis accordingly, showing an optimal tester that only uses $O(1/\varepsilon)$ queries. This modification works for the low-degree testers presented in Arora, Bhattacharyya, Fleming, Kelman, and Yoshida (SODA 2023) as well, resulting in optimal testers for degree-$d$ polynomials, for any constant degree $d$. </p> </div> </dd> <dt> <a name='item362'>[362]</a> <a href ="/abs/2411.14432" title="Abstract" id="2411.14432"> arXiv:2411.14432 </a> [<a href="/pdf/2411.14432" title="Download PDF" id="pdf-2411.14432" aria-labelledby="pdf-2411.14432">pdf</a>, <a href="https://arxiv.org/html/2411.14432v1" title="View HTML" id="html-2411.14432" aria-labelledby="html-2411.14432" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14432" title="Other formats" id="oth-2411.14432" aria-labelledby="oth-2411.14432">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Insight-V: Exploring Long-Chain Visual Reasoning with Multimodal Large Language Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Dong,+Y">Yuhao Dong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Z">Zuyan Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sun,+H">Hai-Long Sun</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+J">Jingkang Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hu,+W">Winston Hu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rao,+Y">Yongming Rao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Z">Ziwei Liu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Large Language Models (LLMs) demonstrate enhanced capabilities and reliability by reasoning more, evolving from Chain-of-Thought prompting to product-level solutions like OpenAI o1. Despite various efforts to improve LLM reasoning, high-quality long-chain reasoning data and optimized training pipelines still remain inadequately explored in vision-language tasks. In this paper, we present Insight-V, an early effort to 1) scalably produce long and robust reasoning data for complex multi-modal tasks, and 2) an effective training pipeline to enhance the reasoning capabilities of multi-modal large language models (MLLMs). Specifically, to create long and structured reasoning data without human labor, we design a two-step pipeline with a progressive strategy to generate sufficiently long and diverse reasoning paths and a multi-granularity assessment method to ensure data quality. We observe that directly supervising MLLMs with such long and complex reasoning data will not yield ideal reasoning ability. To tackle this problem, we design a multi-agent system consisting of a reasoning agent dedicated to performing long-chain reasoning and a summary agent trained to judge and summarize reasoning results. We further incorporate an iterative DPO algorithm to enhance the reasoning agent's generation stability and quality. Based on the popular LLaVA-NeXT model and our stronger base MLLM, we demonstrate significant performance gains across challenging multi-modal benchmarks requiring visual reasoning. Benefiting from our multi-agent system, Insight-V can also easily maintain or improve performance on perception-focused multi-modal tasks. </p> </div> </dd> </dl> <dl id='articles'> <h3>Cross submissions (showing 60 of 60 entries)</h3> <dt> <a name='item363'>[363]</a> <a href ="/abs/2210.08868" title="Abstract" id="2210.08868"> arXiv:2210.08868 </a> (cross-list from eess.IV) [<a href="/pdf/2210.08868" title="Download PDF" id="pdf-2210.08868" aria-labelledby="pdf-2210.08868">pdf</a>, <a href="/format/2210.08868" title="Other formats" id="oth-2210.08868" aria-labelledby="oth-2210.08868">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Cerebrovascular Segmentation via Vessel Oriented Filtering Network </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Guo,+Z">Zhanqiang Guo</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Luan,+Y">Yao Luan</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Feng,+J">Jianjiang Feng</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Lu,+W">Wangsheng Lu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Yin,+Y">Yin Yin</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Yang,+G">Guangming Yang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Zhou,+J">Jie Zhou</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Artificial Intelligence (cs.AI); Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> Accurate cerebrovascular segmentation from Magnetic Resonance Angiography (MRA) and Computed Tomography Angiography (CTA) is of great significance in diagnosis and treatment of cerebrovascular pathology. Due to the complexity and topology variability of blood vessels, complete and accurate segmentation of vascular network is still a challenge. In this paper, we proposed a Vessel Oriented Filtering Network (VOF-Net) which embeds domain knowledge into the convolutional neural network. We design oriented filters for blood vessels according to vessel orientation field, which is obtained by orientation estimation network. Features extracted by oriented filtering are injected into segmentation network, so as to make use of the prior information that the blood vessels are slender and curved tubular structure. Experimental results on datasets of CTA and MRA show that the proposed method is effective for vessel segmentation, and embedding the specific vascular filter improves the segmentation performance. </p> </div> </dd> <dt> <a name='item364'>[364]</a> <a href ="/abs/2411.13512" title="Abstract" id="2411.13512"> arXiv:2411.13512 </a> (cross-list from cond-mat.dis-nn) [<a href="/pdf/2411.13512" title="Download PDF" id="pdf-2411.13512" aria-labelledby="pdf-2411.13512">pdf</a>, <a href="https://arxiv.org/html/2411.13512v1" title="View HTML" id="html-2411.13512" aria-labelledby="html-2411.13512" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13512" title="Other formats" id="oth-2411.13512" aria-labelledby="oth-2411.13512">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Dyson Brownian motion and random matrix dynamics of weight matrices during learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Aarts,+G">Gert Aarts</a> (Swansea University), <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Hajizadeh,+O">Ouraman Hajizadeh</a> (Graz), <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Lucini,+B">Biagio Lucini</a> (Swansea University), <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Park,+C">Chanju Park</a> (Swansea University)</div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 7 pages. Contribution accepted in the NeurIPS 2024 workshop "Machine Learning and the Physical Sciences" </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Disordered Systems and Neural Networks (cond-mat.dis-nn)</span>; Machine Learning (cs.LG); High Energy Physics - Lattice (hep-lat) </div> <p class='mathjax'> During training, weight matrices in machine learning architectures are updated using stochastic gradient descent or variations thereof. In this contribution we employ concepts of random matrix theory to analyse the resulting stochastic matrix dynamics. We first demonstrate that the dynamics can generically be described using Dyson Brownian motion, leading to e.g. eigenvalue repulsion. The level of stochasticity is shown to depend on the ratio of the learning rate and the mini-batch size, explaining the empirically observed linear scaling rule. We verify this linear scaling in the restricted Boltzmann machine. Subsequently we study weight matrix dynamics in transformers (a nano-GPT), following the evolution from a Marchenko-Pastur distribution for eigenvalues at initialisation to a combination with additional structure at the end of learning. </p> </div> </dd> <dt> <a name='item365'>[365]</a> <a href ="/abs/2411.13559" title="Abstract" id="2411.13559"> arXiv:2411.13559 </a> (cross-list from q-fin.TR) [<a href="/pdf/2411.13559" title="Download PDF" id="pdf-2411.13559" aria-labelledby="pdf-2411.13559">pdf</a>, <a href="/format/2411.13559" title="Other formats" id="oth-2411.13559" aria-labelledby="oth-2411.13559">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Composing Ensembles of Instrument-Model Pairs for Optimizing Profitability in Algorithmic Trading </div> <div class='list-authors'><a href="https://arxiv.org/search/q-fin?searchtype=author&query=Hassanizorgabad,+S">Sahand Hassanizorgabad</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Trading and Market Microstructure (q-fin.TR)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> Financial markets are nonlinear with complexity, where different types of assets are traded between buyers and sellers, each having a view to maximize their Return on Investment (ROI). Forecasting market trends is a challenging task since various factors like stock-specific news, company profiles, public sentiments, and global economic conditions influence them. This paper describes a daily price directional predictive system of financial instruments, addressing the difficulty of predicting short-term price movements. This paper will introduce the development of a novel trading system methodology by proposing a two-layer Composing Ensembles architecture, optimized through grid search, to predict whether the price will rise or fall the next day. This strategy was back-tested on a wide range of financial instruments and time frames, demonstrating an improvement of 20% over the benchmark, representing a standard investment strategy. </p> </div> </dd> <dt> <a name='item366'>[366]</a> <a href ="/abs/2411.13562" title="Abstract" id="2411.13562"> arXiv:2411.13562 </a> (cross-list from q-fin.ST) [<a href="/pdf/2411.13562" title="Download PDF" id="pdf-2411.13562" aria-labelledby="pdf-2411.13562">pdf</a>, <a href="/format/2411.13562" title="Other formats" id="oth-2411.13562" aria-labelledby="oth-2411.13562">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> The Role of AI in Financial Forecasting: ChatGPT's Potential and Challenges </div> <div class='list-authors'><a href="https://arxiv.org/search/q-fin?searchtype=author&query=Bi,+S">Shuochen Bi</a>, <a href="https://arxiv.org/search/q-fin?searchtype=author&query=Deng,+T">Tingting Deng</a>, <a href="https://arxiv.org/search/q-fin?searchtype=author&query=Xiao,+J">Jue Xiao</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 7 pages, 4 figures, 3 tables </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Statistical Finance (q-fin.ST)</span>; Artificial Intelligence (cs.AI); Computers and Society (cs.CY) </div> <p class='mathjax'> The outlook for the future of artificial intelligence (AI) in the financial sector, especially in financial forecasting, the challenges and implications. The dynamics of AI technology, including deep learning, reinforcement learning, and integration with blockchAIn and the Internet of Things, also highlight the continued improvement in data processing capabilities. Explore how AI is reshaping financial services with precisely tAIlored services that can more precisely meet the diverse needs of individual investors. The integration of AI challenges regulatory and ethical issues in the financial sector, as well as the implications for data privacy protection. Analyze the limitations of current AI technology in financial forecasting and its potential impact on the future financial industry landscape, including changes in the job market, the emergence of new financial institutions, and user interface innovations. Emphasizing the importance of increasing investor understanding and awareness of AI and looking ahead to future trends in AI tools for user experience to drive wider adoption of AI in financial decision making. The huge potential, challenges, and future directions of AI in the financial sector highlight the critical role of AI technology in driving transformation and innovation in the financial sector </p> </div> </dd> <dt> <a name='item367'>[367]</a> <a href ="/abs/2411.13564" title="Abstract" id="2411.13564"> arXiv:2411.13564 </a> (cross-list from q-fin.ST) [<a href="/pdf/2411.13564" title="Download PDF" id="pdf-2411.13564" aria-labelledby="pdf-2411.13564">pdf</a>, <a href="https://arxiv.org/html/2411.13564v1" title="View HTML" id="html-2411.13564" aria-labelledby="html-2411.13564" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13564" title="Other formats" id="oth-2411.13564" aria-labelledby="oth-2411.13564">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Random Forest approach to detect and identify Unlawful Insider Trading </div> <div class='list-authors'><a href="https://arxiv.org/search/q-fin?searchtype=author&query=Neupane,+K">Krishna Neupane</a>, <a href="https://arxiv.org/search/q-fin?searchtype=author&query=Griva,+I">Igor Griva</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Statistical Finance (q-fin.ST)</span>; Machine Learning (cs.LG); Risk Management (q-fin.RM); Trading and Market Microstructure (q-fin.TR) </div> <p class='mathjax'> According to The Exchange Act, 1934 unlawful insider trading is the abuse of access to privileged corporate information. While a blurred line between "routine" the "opportunistic" insider trading exists, detection of strategies that insiders mold to maneuver fair market prices to their advantage is an uphill battle for hand-engineered approaches. In the context of detailed high-dimensional financial and trade data that are structurally built by multiple covariates, in this study, we explore, implement and provide detailed comparison to the existing study (Deng et al. (2019)) and independently implement automated end-to-end state-of-art methods by integrating principal component analysis to the random forest (PCA-RF) followed by a standalone random forest (RF) with 320 and 3984 randomly selected, semi-manually labeled and normalized transactions from multiple industry. The settings successfully uncover latent structures and detect unlawful insider trading. Among the multiple scenarios, our best-performing model accurately classified 96.43 percent of transactions. Among all transactions the models find 95.47 lawful as lawful and $98.00$ unlawful as unlawful percent. Besides, the model makes very few mistakes in classifying lawful as unlawful by missing only 2.00 percent. In addition to the classification task, model generated Gini Impurity based features ranking, our analysis show ownership and governance related features based on permutation values play important roles. In summary, a simple yet powerful automated end-to-end method relieves labor-intensive activities to redirect resources to enhance rule-making and tracking the uncaptured unlawful insider trading transactions. We emphasize that developed financial and trading features are capable of uncovering fraudulent behaviors. </p> </div> </dd> <dt> <a name='item368'>[368]</a> <a href ="/abs/2411.13567" title="Abstract" id="2411.13567"> arXiv:2411.13567 </a> (cross-list from math.ST) [<a href="/pdf/2411.13567" title="Download PDF" id="pdf-2411.13567" aria-labelledby="pdf-2411.13567">pdf</a>, <a href="https://arxiv.org/html/2411.13567v1" title="View HTML" id="html-2411.13567" aria-labelledby="html-2411.13567" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13567" title="Other formats" id="oth-2411.13567" aria-labelledby="oth-2411.13567">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Why the p-norms $p{=}1$, $p{=}2$ and $p{=}\infty$ are so special? An answer based on spatial uniformity </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Pinz%C3%B3n,+C">Carlos Pinz贸n</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Statistics Theory (math.ST)</span>; Cryptography and Security (cs.CR); Numerical Analysis (math.NA) </div> <p class='mathjax'> Among all metrics based on p-norms, the Manhattan (p=1), euclidean (p=2) and Chebyshev distances (p=infinity) are the most widely used for their interpretability, simplicity and technical convenience. But these are not the only arguments for the ubiquity of these three p-norms. This article proves that there is a volume-surface correspondence property that is unique to them. More precisely, it is shown that sampling uniformly from the volume of an n-dimensional p-ball and projecting to its surface is equivalent to directly sampling uniformly from its surface if and only if p is 1, 2 or infinity. Sampling algorithms and their implementations in Python are also provided. </p> </div> </dd> <dt> <a name='item369'>[369]</a> <a href ="/abs/2411.13577" title="Abstract" id="2411.13577"> arXiv:2411.13577 </a> (cross-list from eess.AS) [<a href="/pdf/2411.13577" title="Download PDF" id="pdf-2411.13577" aria-labelledby="pdf-2411.13577">pdf</a>, <a href="https://arxiv.org/html/2411.13577v1" title="View HTML" id="html-2411.13577" aria-labelledby="html-2411.13577" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13577" title="Other formats" id="oth-2411.13577" aria-labelledby="oth-2411.13577">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> WavChat: A Survey of Spoken Dialogue Models </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Ji,+S">Shengpeng Ji</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Chen,+Y">Yifu Chen</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Fang,+M">Minghui Fang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Zuo,+J">Jialong Zuo</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Lu,+J">Jingyu Lu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Wang,+H">Hanting Wang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Jiang,+Z">Ziyue Jiang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Zhou,+L">Long Zhou</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Liu,+S">Shujie Liu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Cheng,+X">Xize Cheng</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Yang,+X">Xiaoda Yang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Wang,+Z">Zehan Wang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Yang,+Q">Qian Yang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Li,+J">Jian Li</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Jiang,+Y">Yidi Jiang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=He,+J">Jingzhen He</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Chu,+Y">Yunfei Chu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Xu,+J">Jin Xu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Zhao,+Z">Zhou Zhao</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 60 papes, working in progress </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Audio and Speech Processing (eess.AS)</span>; Computation and Language (cs.CL); Machine Learning (cs.LG); Multimedia (cs.MM); Sound (cs.SD) </div> <p class='mathjax'> Recent advancements in spoken dialogue models, exemplified by systems like GPT-4o, have captured significant attention in the speech domain. Compared to traditional three-tier cascaded spoken dialogue models that comprise speech recognition (ASR), large language models (LLMs), and text-to-speech (TTS), modern spoken dialogue models exhibit greater intelligence. These advanced spoken dialogue models not only comprehend audio, music, and other speech-related features, but also capture stylistic and timbral characteristics in speech. Moreover, they generate high-quality, multi-turn speech responses with low latency, enabling real-time interaction through simultaneous listening and speaking capability. Despite the progress in spoken dialogue systems, there is a lack of comprehensive surveys that systematically organize and analyze these systems and the underlying technologies. To address this, we have first compiled existing spoken dialogue systems in the chronological order and categorized them into the cascaded and end-to-end paradigms. We then provide an in-depth overview of the core technologies in spoken dialogue models, covering aspects such as speech representation, training paradigm, streaming, duplex, and interaction capabilities. Each section discusses the limitations of these technologies and outlines considerations for future research. Additionally, we present a thorough review of relevant datasets, evaluation metrics, and benchmarks from the perspectives of training and evaluating spoken dialogue systems. We hope this survey will contribute to advancing both academic research and industrial applications in the field of spoken dialogue systems. The related material is available at <a href="https://github.com/jishengpeng/WavChat" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item370'>[370]</a> <a href ="/abs/2411.13586" title="Abstract" id="2411.13586"> arXiv:2411.13586 </a> (cross-list from q-fin.ST) [<a href="/pdf/2411.13586" title="Download PDF" id="pdf-2411.13586" aria-labelledby="pdf-2411.13586">pdf</a>, <a href="https://arxiv.org/html/2411.13586v1" title="View HTML" id="html-2411.13586" aria-labelledby="html-2411.13586" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13586" title="Other formats" id="oth-2411.13586" aria-labelledby="oth-2411.13586">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Advance Detection Of Bull And Bear Phases In Cryptocurrency Markets </div> <div class='list-authors'><a href="https://arxiv.org/search/q-fin?searchtype=author&query=Arulkumaran,+R">Rahul Arulkumaran</a>, <a href="https://arxiv.org/search/q-fin?searchtype=author&query=Kumar,+S">Suyash Kumar</a>, <a href="https://arxiv.org/search/q-fin?searchtype=author&query=Tomar,+S">Shikha Tomar</a>, <a href="https://arxiv.org/search/q-fin?searchtype=author&query=Gongalla,+M">Manideep Gongalla</a>, <a href="https://arxiv.org/search/q-fin?searchtype=author&query=Harshitha">Harshitha</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Statistical Finance (q-fin.ST)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Cryptocurrencies are highly volatile financial instruments with more and more new retail investors joining the scene with each passing day. Bitcoin has always proved to determine in which way the rest of the cryptocurrency market is headed towards. As of today Bitcoin has a market dominance of close to 50 percent. Bull and bear phases in cryptocurrencies are determined based on the performance of Bitcoin over the 50 Day and 200 Day Moving Averages. The aim of this paper is to foretell the performance of bitcoin in the near future by employing predictive algorithms. This predicted data will then be used to calculate the 50 Day and 200 Day Moving Averages and subsequently plotted to establish the potential bull and bear phases. </p> </div> </dd> <dt> <a name='item371'>[371]</a> <a href ="/abs/2411.13594" title="Abstract" id="2411.13594"> arXiv:2411.13594 </a> (cross-list from q-fin.TR) [<a href="/pdf/2411.13594" title="Download PDF" id="pdf-2411.13594" aria-labelledby="pdf-2411.13594">pdf</a>, <a href="https://arxiv.org/html/2411.13594v1" title="View HTML" id="html-2411.13594" aria-labelledby="html-2411.13594" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13594" title="Other formats" id="oth-2411.13594" aria-labelledby="oth-2411.13594">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> High resolution microprice estimates from limit orderbook data using hyperdimensional vector Tsetlin Machines </div> <div class='list-authors'><a href="https://arxiv.org/search/q-fin?searchtype=author&query=Blakely,+C+D">Christian D. Blakely</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Trading and Market Microstructure (q-fin.TR)</span>; Machine Learning (cs.LG); Statistical Finance (q-fin.ST) </div> <p class='mathjax'> We propose an error-correcting model for the microprice, a high-frequency estimator of future prices given higher order information of imbalances in the orderbook. The model takes into account a current microprice estimate given the spread and best bid to ask imbalance, and adjusts the microprice based on recent dynamics of higher price rank imbalances. We introduce a computationally fast estimator using a recently proposed hyperdimensional vector Tsetlin machine framework and demonstrate empirically that this estimator can provide a robust estimate of future prices in the orderbook. </p> </div> </dd> <dt> <a name='item372'>[372]</a> <a href ="/abs/2411.13599" title="Abstract" id="2411.13599"> arXiv:2411.13599 </a> (cross-list from q-fin.ST) [<a href="/pdf/2411.13599" title="Download PDF" id="pdf-2411.13599" aria-labelledby="pdf-2411.13599">pdf</a>, <a href="https://arxiv.org/html/2411.13599v1" title="View HTML" id="html-2411.13599" aria-labelledby="html-2411.13599" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13599" title="Other formats" id="oth-2411.13599" aria-labelledby="oth-2411.13599">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Can ChatGPT Overcome Behavioral Biases in the Financial Sector? Classify-and-Rethink: Multi-Step Zero-Shot Reasoning in the Gold Investment </div> <div class='list-authors'><a href="https://arxiv.org/search/q-fin?searchtype=author&query=Liu,+S">Shuoling Liu</a>, <a href="https://arxiv.org/search/q-fin?searchtype=author&query=Jia,+G">Gaoguo Jia</a>, <a href="https://arxiv.org/search/q-fin?searchtype=author&query=Jiang,+Y">Yuhang Jiang</a>, <a href="https://arxiv.org/search/q-fin?searchtype=author&query=Chen,+L">Liyuan Chen</a>, <a href="https://arxiv.org/search/q-fin?searchtype=author&query=Yang,+Q">Qiang Yang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Statistical Finance (q-fin.ST)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Large Language Models (LLMs) have achieved remarkable success recently, displaying exceptional capabilities in creating understandable and organized text. These LLMs have been utilized in diverse fields, such as clinical research, where domain-specific models like Med-Palm have achieved human-level performance. Recently, researchers have employed advanced prompt engineering to enhance the general reasoning ability of LLMs. Despite the remarkable success of zero-shot Chain-of-Thoughts (CoT) in solving general reasoning tasks, the potential of these methods still remains paid limited attention in the financial reasoning <a href="http://task.To" rel="external noopener nofollow" class="link-external link-http">this http URL</a> address this issue, we explore multiple prompt strategies and incorporated semantic news information to improve LLMs' performance on financial reasoning <a href="http://tasks.To" rel="external noopener nofollow" class="link-external link-http">this http URL</a> the best of our knowledge, we are the first to explore this important issue by applying ChatGPT to the gold <a href="http://investment.In" rel="external noopener nofollow" class="link-external link-http">this http URL</a> this work, our aim is to investigate the financial reasoning capabilities of LLMs and their capacity to generate logical and persuasive investment opinions. We will use ChatGPT, one of the most powerful LLMs recently, and prompt engineering to achieve this goal. Our research will focus on understanding the ability of LLMs in sophisticated analysis and reasoning within the context of investment decision-making. Our study finds that ChatGPT with CoT prompt can provide more explainable predictions and overcome behavioral biases, which is crucial in finance-related tasks and can achieve higher investment returns. </p> </div> </dd> <dt> <a name='item373'>[373]</a> <a href ="/abs/2411.13601" title="Abstract" id="2411.13601"> arXiv:2411.13601 </a> (cross-list from stat.CO) [<a href="/pdf/2411.13601" title="Download PDF" id="pdf-2411.13601" aria-labelledby="pdf-2411.13601">pdf</a>, <a href="/format/2411.13601" title="Other formats" id="oth-2411.13601" aria-labelledby="oth-2411.13601">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Error Analysis of Sum-Product Algorithms under Stochastic Rounding </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=de+Oliveira+Castro,+P">Pablo de Oliveira Castro</a> (LI-PaRAD, UVSQ), <a href="https://arxiv.org/search/stat?searchtype=author&query=Arar,+E+E">El-Mehdi El Arar</a> (IRISA, UR), <a href="https://arxiv.org/search/stat?searchtype=author&query=Petit,+E">Eric Petit</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Sohier,+D">Devan Sohier</a> (LI-PaRAD, UVSQ)</div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation (stat.CO)</span>; Data Structures and Algorithms (cs.DS) </div> <p class='mathjax'> The quality of numerical computations can be measured through their forward error, for which finding good error bounds is challenging in general. For several algorithms and using stochastic rounding (SR), probabilistic analysis has been shown to be an effective alternative for obtaining tight error bounds. This analysis considers the distribution of errors and evaluates the algorithm's performance on average. Using martingales and the Azuma-Hoeffding inequality, it provides error bounds that are valid with a certain probability and in $\mathcal{O}(\sqrt{n}u)$ instead of deterministic worst-case bounds in $\mathcal{O}(nu)$, where $n$ is the number of operations and $u$ is the unit <a href="http://roundoff.In" rel="external noopener nofollow" class="link-external link-http">this http URL</a> this paper, we present a general method that automatically constructs a martingale for any computation scheme with multi-linear errors based on additions, subtractions, and multiplications. We apply this generalization to algorithms previously studied with SR, such as pairwise summation and the Horner algorithm, and prove equivalent results. We also analyze a previously unstudied algorithm, Karatsuba polynomial multiplication, which illustrates that the method can handle reused intermediate computations. </p> </div> </dd> <dt> <a name='item374'>[374]</a> <a href ="/abs/2411.13602" title="Abstract" id="2411.13602"> arXiv:2411.13602 </a> (cross-list from eess.IV) [<a href="/pdf/2411.13602" title="Download PDF" id="pdf-2411.13602" aria-labelledby="pdf-2411.13602">pdf</a>, <a href="/format/2411.13602" title="Other formats" id="oth-2411.13602" aria-labelledby="oth-2411.13602">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Large-scale cross-modality pretrained model enhances cardiovascular state estimation and cardiomyopathy detection from electrocardiograms: An AI system development and multi-center validation study </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Ding,+Z">Zhengyao Ding</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Hu,+Y">Yujian Hu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Xu,+Y">Youyao Xu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Zhao,+C">Chengchen Zhao</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Li,+Z">Ziyu Li</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Mao,+Y">Yiheng Mao</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Li,+H">Haitao Li</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Li,+Q">Qian Li</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Wang,+J">Jing Wang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Chen,+Y">Yue Chen</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Chen,+M">Mengjia Chen</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Wang,+L">Longbo Wang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Chu,+X">Xuesen Chu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Pan,+W">Weichao Pan</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Liu,+Z">Ziyi Liu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Wu,+F">Fei Wu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Zhang,+H">Hongkun Zhang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Chen,+T">Ting Chen</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Huang,+Z">Zhengxing Huang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 23 pages, 8 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Artificial Intelligence (cs.AI); Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> Cardiovascular diseases (CVDs) present significant challenges for early and accurate diagnosis. While cardiac magnetic resonance imaging (CMR) is the gold standard for assessing cardiac function and diagnosing CVDs, its high cost and technical complexity limit accessibility. In contrast, electrocardiography (ECG) offers promise for large-scale early screening. This study introduces CardiacNets, an innovative model that enhances ECG analysis by leveraging the diagnostic strengths of CMR through cross-modal contrastive learning and generative pretraining. CardiacNets serves two primary functions: (1) it evaluates detailed cardiac function indicators and screens for potential CVDs, including coronary artery disease, cardiomyopathy, pericarditis, heart failure and pulmonary hypertension, using ECG input; and (2) it enhances interpretability by generating high-quality CMR images from ECG data. We train and validate the proposed CardiacNets on two large-scale public datasets (the UK Biobank with 41,519 individuals and the MIMIC-IV-ECG comprising 501,172 samples) as well as three private datasets (FAHZU with 410 individuals, SAHZU with 464 individuals, and QPH with 338 individuals), and the findings demonstrate that CardiacNets consistently outperforms traditional ECG-only models, substantially improving screening accuracy. Furthermore, the generated CMR images provide valuable diagnostic support for physicians of all experience levels. This proof-of-concept study highlights how ECG can facilitate cross-modal insights into cardiac function assessment, paving the way for enhanced CVD screening and diagnosis at a population level. </p> </div> </dd> <dt> <a name='item375'>[375]</a> <a href ="/abs/2411.13603" title="Abstract" id="2411.13603"> arXiv:2411.13603 </a> (cross-list from q-fin.ST) [<a href="/pdf/2411.13603" title="Download PDF" id="pdf-2411.13603" aria-labelledby="pdf-2411.13603">pdf</a>, <a href="https://arxiv.org/html/2411.13603v1" title="View HTML" id="html-2411.13603" aria-labelledby="html-2411.13603" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13603" title="Other formats" id="oth-2411.13603" aria-labelledby="oth-2411.13603">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Full-History Network Dataset for BTC Asset Decentralization Profiling </div> <div class='list-authors'><a href="https://arxiv.org/search/q-fin?searchtype=author&query=Cheng,+L">Ling Cheng</a>, <a href="https://arxiv.org/search/q-fin?searchtype=author&query=Shao,+Q">Qian Shao</a>, <a href="https://arxiv.org/search/q-fin?searchtype=author&query=Zeng,+F">Fengzhu Zeng</a>, <a href="https://arxiv.org/search/q-fin?searchtype=author&query=Zhu,+F">Feida Zhu</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> IEEE BigData 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Statistical Finance (q-fin.ST)</span>; Social and Information Networks (cs.SI) </div> <p class='mathjax'> Since its advent in 2009, Bitcoin (BTC) has garnered increasing attention from both academia and industry. However, due to the massive transaction volume, no systematic study has quantitatively measured the asset decentralization degree specifically from a network perspective. <br>In this paper, by conducting a thorough analysis of the BTC transaction network, we first address the significant gap in the availability of full-history BTC graph and network property dataset, which spans over 15 years from the genesis block (1st March, 2009) to the 845651-th block (29, May 2024). We then present the first systematic investigation to profile BTC's asset decentralization and design several decentralization degrees for quantification. Through extensive experiments, we emphasize the significant role of network properties and our network-based decentralization degree in enhancing Bitcoin analysis. Our findings demonstrate the importance of our comprehensive dataset and analysis in advancing research on Bitcoin's transaction dynamics and decentralization, providing valuable insights into the network's structure and its implications. </p> </div> </dd> <dt> <a name='item376'>[376]</a> <a href ="/abs/2411.13608" title="Abstract" id="2411.13608"> arXiv:2411.13608 </a> (cross-list from stat.AP) [<a href="/pdf/2411.13608" title="Download PDF" id="pdf-2411.13608" aria-labelledby="pdf-2411.13608">pdf</a>, <a href="https://arxiv.org/html/2411.13608v1" title="View HTML" id="html-2411.13608" aria-labelledby="html-2411.13608" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13608" title="Other formats" id="oth-2411.13608" aria-labelledby="oth-2411.13608">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Integrating Dynamic Correlation Shifts and Weighted Benchmarking in Extreme Value Analysis </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Panagoulias,+D+P">Dimitrios P. Panagoulias</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Sarmas,+E">Elissaios Sarmas</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Marinakis,+V">Vangelis Marinakis</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Virvou,+M">Maria Virvou</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Tsihrintzis,+G+A">George A. Tsihrintzis</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 33 pages, 8 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Applications (stat.AP)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> This paper presents an innovative approach to Extreme Value Analysis (EVA) by introducing the Extreme Value Dynamic Benchmarking Method (EVDBM). EVDBM integrates extreme value theory to detect extreme events and is coupled with the novel Dynamic Identification of Significant Correlation (DISC)-Thresholding algorithm, which enhances the analysis of key variables under extreme conditions. By integrating return values predicted through EVA into the benchmarking scores, we are able to transform these scores to reflect anticipated conditions more accurately. This provides a more precise picture of how each case is projected to unfold under extreme conditions. As a result, the adjusted scores offer a forward-looking perspective, highlighting potential vulnerabilities and resilience factors for each case in a way that static historical data alone cannot capture. By incorporating both historical and probabilistic elements, the EVDBM algorithm provides a comprehensive benchmarking framework that is adaptable to a range of scenarios and contexts. The methodology is applied to real PV data, revealing critical low - production scenarios and significant correlations between variables, which aid in risk management, infrastructure design, and long-term planning, while also allowing for the comparison of different production plants. The flexibility of EVDBM suggests its potential for broader applications in other sectors where decision-making sensitivity is crucial, offering valuable insights to improve outcomes. </p> </div> </dd> <dt> <a name='item377'>[377]</a> <a href ="/abs/2411.13615" title="Abstract" id="2411.13615"> arXiv:2411.13615 </a> (cross-list from q-fin.ST) [<a href="/pdf/2411.13615" title="Download PDF" id="pdf-2411.13615" aria-labelledby="pdf-2411.13615">pdf</a>, <a href="/format/2411.13615" title="Other formats" id="oth-2411.13615" aria-labelledby="oth-2411.13615">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Deep Learning Approach to Predict the Fall [of Price] of Cryptocurrency Long Before its Actual Fall </div> <div class='list-authors'><a href="https://arxiv.org/search/q-fin?searchtype=author&query=Meem,+A+T">Anika Tahsin Meem</a>, <a href="https://arxiv.org/search/q-fin?searchtype=author&query=Akter,+M+S">Mst. Shapna Akter</a>, <a href="https://arxiv.org/search/q-fin?searchtype=author&query=Depto,+D+S">Deponker Sarker Depto</a>, <a href="https://arxiv.org/search/q-fin?searchtype=author&query=Mahdy,+M">M.R.C. Mahdy</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 22 pages, 3 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Statistical Finance (q-fin.ST)</span>; Computer Vision and Pattern Recognition (cs.CV); Machine Learning (cs.LG) </div> <p class='mathjax'> In modern times, the cryptocurrency market is one of the world's most rapidly rising financial markets. The cryptocurrency market is regarded to be more volatile and illiquid than traditional markets such as equities, foreign exchange, and commodities. The risk of this market creates an uncertain condition among the investors. The purpose of this research is to predict the magnitude of the risk factor of the cryptocurrency market. Risk factor is also called volatility. Our approach will assist people who invest in the cryptocurrency market by overcoming the problems and difficulties they experience. Our approach starts with calculating the risk factor of the cryptocurrency market from the existing parameters. In twenty elements of the cryptocurrency market, the risk factor has been predicted using different machine learning algorithms such as CNN, LSTM, BiLSTM, and GRU. All of the models have been applied to the calculated risk factor parameter. A new model has been developed to predict better than the existing models. Our proposed model gives the highest RMSE value of 1.3229 and the lowest RMSE value of 0.0089. Following our model, it will be easier for investors to trade in complicated and challenging financial assets like bitcoin, Ethereum, dogecoin, etc. Where the other existing models, the highest RMSE was 14.5092, and the lower was 0.02769. So, the proposed model performs much better than models with proper generalization. Using our approach, it will be easier for investors to trade in complicated and challenging financial assets like Bitcoin, Ethereum, and Dogecoin. </p> </div> </dd> <dt> <a name='item378'>[378]</a> <a href ="/abs/2411.13670" title="Abstract" id="2411.13670"> arXiv:2411.13670 </a> (cross-list from cond-mat.mtrl-sci) [<a href="/pdf/2411.13670" title="Download PDF" id="pdf-2411.13670" aria-labelledby="pdf-2411.13670">pdf</a>, <a href="/format/2411.13670" title="Other formats" id="oth-2411.13670" aria-labelledby="oth-2411.13670">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Graph neural network framework for energy mapping of hybrid monte-carlo molecular dynamics simulations of Medium Entropy Alloys </div> <div class='list-authors'><a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Ehsan,+M+T">Mashaekh Tausif Ehsan</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Zafar,+S">Saifuddin Zafar</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Sarker,+A">Apurba Sarker</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Suvro,+S+D">Sourav Das Suvro</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Hasan,+M+N">Mohammad Nasim Hasan</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 28 pages, 9 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Materials Science (cond-mat.mtrl-sci)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Machine learning (ML) methods have drawn significant interest in material design and discovery. Graph neural networks (GNNs), in particular, have demonstrated strong potential for predicting material properties. The present study proposes a graph-based representation for modeling medium-entropy alloys (MEAs). Hybrid Monte-Carlo molecular dynamics (MC/MD) simulations are employed to achieve thermally stable structures across various annealing temperatures in an MEA. These simulations generate dump files and potential energy labels, which are used to construct graph representations of the atomic configurations. Edges are created between each atom and its 12 nearest neighbors without incorporating explicit edge features. These graphs then serve as input for a Graph Convolutional Neural Network (GCNN) based ML model to predict the system's potential energy. The GCNN architecture effectively captures the local environment and chemical ordering within the MEA structure. The GCNN-based ML model demonstrates strong performance in predicting potential energy at different steps, showing satisfactory results on both the training data and unseen configurations. Our approach presents a graph-based modeling framework for MEAs and high-entropy alloys (HEAs), which effectively captures the local chemical order (LCO) within the alloy structure. This allows us to predict key material properties influenced by LCO in both MEAs and HEAs, providing deeper insights into how atomic-scale arrangements affect the properties of these alloys. </p> </div> </dd> <dt> <a name='item379'>[379]</a> <a href ="/abs/2411.13715" title="Abstract" id="2411.13715"> arXiv:2411.13715 </a> (cross-list from physics.optics) [<a href="/pdf/2411.13715" title="Download PDF" id="pdf-2411.13715" aria-labelledby="pdf-2411.13715">pdf</a>, <a href="https://arxiv.org/html/2411.13715v1" title="View HTML" id="html-2411.13715" aria-labelledby="html-2411.13715" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13715" title="Other formats" id="oth-2411.13715" aria-labelledby="oth-2411.13715">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> SimPhony: A Device-Circuit-Architecture Cross-Layer Modeling and Simulation Framework for Heterogeneous Electronic-Photonic AI System </div> <div class='list-authors'><a href="https://arxiv.org/search/physics?searchtype=author&query=Yin,+Z">Ziang Yin</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Zhang,+M">Meng Zhang</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Begovic,+A">Amir Begovic</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Huang,+R">Rena Huang</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Zhang,+J">Jeff Zhang</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Gu,+J">Jiaqi Gu</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 7-page </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Optics (physics.optics)</span>; Artificial Intelligence (cs.AI); Hardware Architecture (cs.AR); Emerging Technologies (cs.ET); Machine Learning (cs.LG) </div> <p class='mathjax'> Electronic-photonic integrated circuits (EPICs) offer transformative potential for next-generation high-performance AI but require interdisciplinary advances across devices, circuits, architecture, and design automation. The complexity of hybrid systems makes it challenging even for domain experts to understand distinct behaviors and interactions across design stack. The lack of a flexible, accurate, fast, and easy-to-use EPIC AI system simulation framework significantly limits the exploration of hardware innovations and system evaluations on common benchmarks. To address this gap, we propose SimPhony, a cross-layer modeling and simulation framework for heterogeneous electronic-photonic AI systems. SimPhony offers a platform that enables (1) generic, extensible hardware topology representation that supports heterogeneous multi-core architectures with diverse photonic tensor core designs; (2) optics-specific dataflow modeling with unique multi-dimensional parallelism and reuse beyond spatial/temporal dimensions; (3) data-aware energy modeling with realistic device responses, layout-aware area estimation, link budget analysis, and bandwidth-adaptive memory modeling; and (4) seamless integration with model training framework for hardware/software co-simulation. By providing a unified, versatile, and high-fidelity simulation platform, SimPhony enables researchers to innovate and evaluate EPIC AI hardware across multiple domains, facilitating the next leap in emerging AI hardware. We open-source our codes at <a href="https://github.com/ScopeX-ASU/SimPhony" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </p> </div> </dd> <dt> <a name='item380'>[380]</a> <a href ="/abs/2411.13742" title="Abstract" id="2411.13742"> arXiv:2411.13742 </a> (cross-list from quant-ph) [<a href="/pdf/2411.13742" title="Download PDF" id="pdf-2411.13742" aria-labelledby="pdf-2411.13742">pdf</a>, <a href="https://arxiv.org/html/2411.13742v1" title="View HTML" id="html-2411.13742" aria-labelledby="html-2411.13742" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13742" title="Other formats" id="oth-2411.13742" aria-labelledby="oth-2411.13742">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Benchmarking a wide range of optimisers for solving the Fermi-Hubbard model using the variational quantum eigensolver </div> <div class='list-authors'><a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Jones,+B+D">Benjamin D.M. Jones</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Mineh,+L">Lana Mineh</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Montanaro,+A">Ashley Montanaro</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 42 pages, 30 figures. Associated data can be found at <a href="https://doi.org/10.5281/zenodo.13960674" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Quantum Physics (quant-ph)</span>; Machine Learning (cs.LG); Neural and Evolutionary Computing (cs.NE) </div> <p class='mathjax'> We numerically benchmark 30 optimisers on 372 instances of the variational quantum eigensolver for solving the Fermi-Hubbard system with the Hamiltonian variational ansatz. We rank the optimisers with respect to metrics such as final energy achieved and function calls needed to get within a certain tolerance level, and find that the best performing optimisers are variants of gradient descent such as Momentum and ADAM (using finite difference), SPSA, CMAES, and BayesMGD. We also perform gradient analysis and observe that the step size for finite difference has a very significant impact. We also consider using simultaneous perturbation (inspired by SPSA) as a gradient subroutine: here finite difference can lead to a more precise estimate of the ground state but uses more calls, whereas simultaneous perturbation can converge quicker but may be less precise in the later stages. Finally, we also study the quantum natural gradient algorithm: we implement this method for 1-dimensional Fermi-Hubbard systems, and find that whilst it can reach a lower energy with fewer iterations, this improvement is typically lost when taking total function calls into account. Our method involves performing careful hyperparameter sweeping on 4 instances. We present a variety of analysis and figures, detailed optimiser notes, and discuss future directions. </p> </div> </dd> <dt> <a name='item381'>[381]</a> <a href ="/abs/2411.13765" title="Abstract" id="2411.13765"> arXiv:2411.13765 </a> (cross-list from math.PR) [<a href="/pdf/2411.13765" title="Download PDF" id="pdf-2411.13765" aria-labelledby="pdf-2411.13765">pdf</a>, <a href="https://arxiv.org/html/2411.13765v1" title="View HTML" id="html-2411.13765" aria-labelledby="html-2411.13765" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13765" title="Other formats" id="oth-2411.13765" aria-labelledby="oth-2411.13765">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Schr\"odinger Bridge Problem for Jump Diffusions </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Zlotchevski,+A">Andrei Zlotchevski</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Chen,+L">Linan Chen</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Probability (math.PR)</span>; Information Theory (cs.IT); Optimization and Control (math.OC) </div> <p class='mathjax'> The Schr枚dinger bridge problem (SBP) seeks to find the measure $\hat{\mathbf{P}}$ on a certain path space which interpolates between state-space distributions $\rho_0$ at time $0$ and $\rho_T$ at time $T$ while minimizing the KL divergence (relative entropy) to a reference path measure $\mathbf{R}$. In this work, we tackle the SBP in the case when $\mathbf{R}$ is the path measure of a jump diffusion. Under mild assumptions, with both the operator theory approach and the stochastic calculus techniques, we establish an $h$-transform theory for jump diffusions and devise an approximation method to achieve the jump-diffusion SBP solution $\hat{\mathbf{P}}$ as the strong-convergence limit of a sequence of harmonic $h$-transforms. To the best of our knowledge, these results are novel in the study of SBP. Moreover, the $h$-transform framework and the approximation method developed in this work are robust and applicable to a relatively general class of jump diffusions. In addition, we examine the SBP of particular types of jump diffusions under additional regularity conditions and extend the existing results on the SBP from the diffusion case to the jump-diffusion setting. </p> </div> </dd> <dt> <a name='item382'>[382]</a> <a href ="/abs/2411.13815" title="Abstract" id="2411.13815"> arXiv:2411.13815 </a> (cross-list from physics.flu-dyn) [<a href="/pdf/2411.13815" title="Download PDF" id="pdf-2411.13815" aria-labelledby="pdf-2411.13815">pdf</a>, <a href="https://arxiv.org/html/2411.13815v1" title="View HTML" id="html-2411.13815" aria-labelledby="html-2411.13815" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13815" title="Other formats" id="oth-2411.13815" aria-labelledby="oth-2411.13815">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> FLRNet: A Deep Learning Method for Regressive Reconstruction of Flow Field From Limited Sensor Measurements </div> <div class='list-authors'><a href="https://arxiv.org/search/physics?searchtype=author&query=Nguyen,+P+C+H">Phong C. H. Nguyen</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Choi,+J+B">Joseph B. Choi</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Luu,+Q">Quang-Trung Luu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Fluid Dynamics (physics.flu-dyn)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Many applications in computational and experimental fluid mechanics require effective methods for reconstructing the flow fields from limited sensor data. However, this task remains a significant challenge because the measurement operator, which provides the punctual sensor measurement for a given state of the flow field, is often ill-conditioned and non-invertible. This issue impedes the feasibility of identifying the forward map, theoretically the inverse of the measurement operator, for field reconstruction purposes. While data-driven methods are available, their generalizability across different flow conditions (\textit{e.g.,} different Reynold numbers) remains questioned. Moreover, they frequently face the problem of spectral bias, which leads to smooth and blurry reconstructed fields, thereby decreasing the accuracy of reconstruction. We introduce FLRNet, a deep learning method for flow field reconstruction from sparse sensor measurements. FLRNet employs an variational autoencoder with Fourier feature layers and incorporates an extra perceptual loss term during training to learn a rich, low-dimensional latent representation of the flow field. The learned latent representation is then correlated to the sensor measurement using a fully connected (dense) network. We validated the reconstruction capability and the generalizability of FLRNet under various fluid flow conditions and sensor configurations, including different sensor counts and sensor layouts. Numerical experiments show that in all tested scenarios, FLRNet consistently outperformed other baselines, delivering the most accurate reconstructed flow field and being the most robust to noise. </p> </div> </dd> <dt> <a name='item383'>[383]</a> <a href ="/abs/2411.13855" title="Abstract" id="2411.13855"> arXiv:2411.13855 </a> (cross-list from eess.IV) [<a href="/pdf/2411.13855" title="Download PDF" id="pdf-2411.13855" aria-labelledby="pdf-2411.13855">pdf</a>, <a href="https://arxiv.org/html/2411.13855v1" title="View HTML" id="html-2411.13855" aria-labelledby="html-2411.13855" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13855" title="Other formats" id="oth-2411.13855" aria-labelledby="oth-2411.13855">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Multimodal Approach to The Detection and Classification of Skin Diseases </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Yang,+A">Allen Yang</a> (1), <a href="https://arxiv.org/search/eess?searchtype=author&query=Yang,+E">Edward Yang</a> (2), ((1) Mission San Jose High School, Fremont, CA, (2) Yale University, New Haven, CT)</div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Computer Vision and Pattern Recognition (cs.CV); Machine Learning (cs.LG) </div> <p class='mathjax'> According to PBS, nearly one-third of Americans lack access to primary care services, and another forty percent delay going to avoid medical costs. As a result, many diseases are left undiagnosed and untreated, even if the disease shows many physical symptoms on the skin. With the rise of AI, self-diagnosis and improved disease recognition have become more promising than ever; in spite of that, existing methods suffer from a lack of large-scale patient databases and outdated methods of study, resulting in studies being limited to only a few diseases or modalities. This study incorporates readily available and easily accessible patient information via image and text for skin disease classification on a new dataset of 26 skin disease types that includes both skin disease images (37K) and associated patient narratives. Using this dataset, baselines for various image models were established that outperform existing methods. Initially, the Resnet-50 model was only able to achieve an accuracy of 70% but, after various optimization techniques, the accuracy was improved to 80%. In addition, this study proposes a novel fine-tuning strategy for sequence classification Large Language Models (LLMs), Chain of Options, which breaks down a complex reasoning task into intermediate steps at training time instead of inference. With Chain of Options and preliminary disease recommendations from the image model, this method achieves state of the art accuracy 91% in diagnosing patient skin disease given just an image of the afflicted area as well as a patient description of the symptoms (such as itchiness or dizziness). Through this research, an earlier diagnosis of skin diseases can occur, and clinicians can work with deep learning models to give a more accurate diagnosis, improving quality of life and saving lives. </p> </div> </dd> <dt> <a name='item384'>[384]</a> <a href ="/abs/2411.13862" title="Abstract" id="2411.13862"> arXiv:2411.13862 </a> (cross-list from eess.IV) [<a href="/pdf/2411.13862" title="Download PDF" id="pdf-2411.13862" aria-labelledby="pdf-2411.13862">pdf</a>, <a href="https://arxiv.org/html/2411.13862v1" title="View HTML" id="html-2411.13862" aria-labelledby="html-2411.13862" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13862" title="Other formats" id="oth-2411.13862" aria-labelledby="oth-2411.13862">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Image Compression Using Novel View Synthesis Priors </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Peng,+L">Luyuan Peng</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Chitre,+M">Mandar Chitre</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Vishnu,+H">Hari Vishnu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Too,+Y+M">Yuen Min Too</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Kalyan,+B">Bharath Kalyan</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Mishra,+R">Rajat Mishra</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Tan,+S+P">Soo Pieng Tan</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Preprint submitted to Ocean Engineering </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Computer Vision and Pattern Recognition (cs.CV); Robotics (cs.RO) </div> <p class='mathjax'> Real-time visual feedback is essential for tetherless control of remotely operated vehicles, particularly during inspection and manipulation tasks. Though acoustic communication is the preferred choice for medium-range communication underwater, its limited bandwidth renders it impractical to transmit images or videos in real-time. To address this, we propose a model-based image compression technique that leverages prior mission information. Our approach employs trained machine-learning based novel view synthesis models, and uses gradient descent optimization to refine latent representations to help generate compressible differences between camera images and rendered images. We evaluate the proposed compression technique using a dataset from an artificial ocean basin, demonstrating superior compression ratios and image quality over existing techniques. Moreover, our method exhibits robustness to introduction of new objects within the scene, highlighting its potential for advancing tetherless remotely operated vehicle operations. </p> </div> </dd> <dt> <a name='item385'>[385]</a> <a href ="/abs/2411.13869" title="Abstract" id="2411.13869"> arXiv:2411.13869 </a> (cross-list from math.OC) [<a href="/pdf/2411.13869" title="Download PDF" id="pdf-2411.13869" aria-labelledby="pdf-2411.13869">pdf</a>, <a href="/format/2411.13869" title="Other formats" id="oth-2411.13869" aria-labelledby="oth-2411.13869">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Topology optimization of periodic lattice structures for specified mechanical properties using machine learning considering member connectivity </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Matsuoka,+T">Tomoya Matsuoka</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Ohsaki,+M">Makoto Ohsaki</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Hayashi,+K">Kazuki Hayashi</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Presented at Asian Congress of Structural and Multidisciplinary Optimization (ACSMO 2024) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Optimization and Control (math.OC)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> This study proposes a methodology to utilize machine learning (ML) for topology optimization of periodic lattice structures. In particular, we investigate data representation of lattice structures used as input data for ML models to improve the performance of the models, focusing on the filtering process and feature selection. We use the filtering technique to explicitly consider the connectivity of lattice members and perform feature selection to reduce the input data size. In addition, we propose a convolution approach to apply pre-trained models for small structures to structures of larger sizes. The computational cost for obtaining optimal topologies by a heuristic method is reduced by incorporating the prediction of the trained ML model into the optimization process. In the numerical examples, a response prediction model is constructed for a lattice structure of 4x4 units, and topology optimization of 4x4-unit and 8x8-unit structures is performed by simulated annealing assisted by the trained ML model. The example demonstrates that ML models perform higher accuracy by using the filtered data as input than by solely using the data representing the existence of each member. It is also demonstrated that a small-scale prediction model can be constructed with sufficient accuracy by feature selection. Additionally, the proposed method can find the optimal structure in less computation time than the pure simulated annealing. </p> </div> </dd> <dt> <a name='item386'>[386]</a> <a href ="/abs/2411.13887" title="Abstract" id="2411.13887"> arXiv:2411.13887 </a> (cross-list from math.AT) [<a href="/pdf/2411.13887" title="Download PDF" id="pdf-2411.13887" aria-labelledby="pdf-2411.13887">pdf</a>, <a href="https://arxiv.org/html/2411.13887v1" title="View HTML" id="html-2411.13887" aria-labelledby="html-2411.13887" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13887" title="Other formats" id="oth-2411.13887" aria-labelledby="oth-2411.13887">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A cohomology-based Gromov-Hausdorff metric approach for quantifying molecular similarity </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Wee,+J">JunJie Wee</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Gong,+X">Xue Gong</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Tuschmann,+W">Wilderich Tuschmann</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Xia,+K">Kelin Xia</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 14 pages, 3 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Algebraic Topology (math.AT)</span>; Materials Science (cond-mat.mtrl-sci); Computational Geometry (cs.CG); Metric Geometry (math.MG); Machine Learning (stat.ML) </div> <p class='mathjax'> We introduce, for the first time, a cohomology-based Gromov-Hausdorff ultrametric method to analyze 1-dimensional and higher-dimensional (co)homology groups, focusing on loops, voids, and higher-dimensional cavity structures in simplicial complexes, to address typical clustering questions arising in molecular data analysis. The Gromov-Hausdorff distance quantifies the dissimilarity between two metric spaces. In this framework, molecules are represented as simplicial complexes, and their cohomology vector spaces are computed to capture intrinsic topological invariants encoding loop and cavity structures. These vector spaces are equipped with a suitable distance measure, enabling the computation of the Gromov-Hausdorff ultrametric to evaluate structural dissimilarities. We demonstrate the methodology using organic-inorganic halide perovskite (OIHP) structures. The results highlight the effectiveness of this approach in clustering various molecular structures. By incorporating geometric information, our method provides deeper insights compared to traditional persistent homology techniques. </p> </div> </dd> <dt> <a name='item387'>[387]</a> <a href ="/abs/2411.13903" title="Abstract" id="2411.13903"> arXiv:2411.13903 </a> (cross-list from eess.SP) [<a href="/pdf/2411.13903" title="Download PDF" id="pdf-2411.13903" aria-labelledby="pdf-2411.13903">pdf</a>, <a href="/format/2411.13903" title="Other formats" id="oth-2411.13903" aria-labelledby="oth-2411.13903">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> AmpliNetECG12: A lightweight SoftMax-based relativistic amplitude amplification architecture for 12 lead ECG classification </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Shreya">Shreya Srivastava</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> The urgent need to promptly detect cardiac disorders from 12-lead Electrocardiograms using limited computations is motivated by the heart's fast and complex electrical activity and restricted computational power of portable devices. Timely and precise diagnoses are crucial since delays might significantly impact patient health outcomes. This research presents a novel deep-learning architecture that aims to diagnose heart abnormalities quickly and accurately. We devised a new activation function called aSoftMax, designed to improve the visibility of ECG deflections. The proposed activation function is used with Convolutional Neural Network architecture to includes kernel weight sharing across the ECG's various leads. This innovative method thoroughly generalizes the global 12-lead ECG features and minimizes the model's complexity by decreasing the trainable parameters. aSoftMax, combined with enhanced CNN architecture yielded AmpliNetECG12, we obtain exceptional accuracy of 84% in diagnosing cardiac disorders. AmpliNetECG12 shows outstanding prediction ability when used with the CPSC2018 dataset for arrhythmia classification. The model attains an F1-score of 80.71% and a ROC-AUC score of 96.00%, with 280,000 trainable parameters which signifies the lightweight yet efficient nature of AmpliNetECG12. The stochastic characteristics of aSoftMax, a fundamental element of AmpliNetECG12, improve prediction accuracy and also increasse the model's interpretability. This feature enhances comprehension of important ECG segments in different forms of arrhythmias, establishing a new standard of explainable architecture for cardiac disorder classification. </p> </div> </dd> <dt> <a name='item388'>[388]</a> <a href ="/abs/2411.13922" title="Abstract" id="2411.13922"> arXiv:2411.13922 </a> (cross-list from stat.ML) [<a href="/pdf/2411.13922" title="Download PDF" id="pdf-2411.13922" aria-labelledby="pdf-2411.13922">pdf</a>, <a href="https://arxiv.org/html/2411.13922v1" title="View HTML" id="html-2411.13922" aria-labelledby="html-2411.13922" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13922" title="Other formats" id="oth-2411.13922" aria-labelledby="oth-2411.13922">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Exponentially Consistent Nonparametric Clustering of Data Streams </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Singh,+B">Bhupender Singh</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Rajagopalan,+A+R">Ananth Ram Rajagopalan</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Bhashyam,+S">Srikrishna Bhashyam</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Information Theory (cs.IT); Machine Learning (cs.LG); Signal Processing (eess.SP) </div> <p class='mathjax'> In this paper, we consider nonparametric clustering of $M$ independent and identically distributed (i.i.d.) data streams generated from unknown distributions. The distributions of the $M$ data streams belong to $K$ underlying distribution clusters. Existing results on exponentially consistent nonparametric clustering algorithms, like single linkage-based (SLINK) clustering and $k$-medoids distribution clustering, assume that the maximum intra-cluster distance ($d_L$) is smaller than the minimum inter-cluster distance ($d_H$). First, in the fixed sample size (FSS) setting, we show that exponential consistency can be achieved for SLINK clustering under a less strict assumption, $d_I < d_H$, where $d_I$ is the maximum distance between any two sub-clusters of a cluster that partition the cluster. Note that $d_I < d_L$ in general. Our results show that SLINK is exponentially consistent for a larger class of problems than $k$-medoids distribution clustering. We also identify examples where $k$-medoids clustering is unable to find the true clusters, but SLINK is exponentially consistent. Then, we propose a sequential clustering algorithm, named SLINK-SEQ, based on SLINK and prove that it is also exponentially consistent. Simulation results show that the SLINK-SEQ algorithm requires fewer expected number of samples than the FSS SLINK algorithm for the same probability of error. </p> </div> </dd> <dt> <a name='item389'>[389]</a> <a href ="/abs/2411.13970" title="Abstract" id="2411.13970"> arXiv:2411.13970 </a> (cross-list from eess.SP) [<a href="/pdf/2411.13970" title="Download PDF" id="pdf-2411.13970" aria-labelledby="pdf-2411.13970">pdf</a>, <a href="https://arxiv.org/html/2411.13970v1" title="View HTML" id="html-2411.13970" aria-labelledby="html-2411.13970" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13970" title="Other formats" id="oth-2411.13970" aria-labelledby="oth-2411.13970">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Movable Antenna-Equipped UAV for Data Collection in Backscatter Sensor Networks: A Deep Reinforcement Learning-based Approach </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Bai,+Y">Yu Bai</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Xie,+B">Boxuan Xie</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Zhu,+R">Ruifan Zhu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Chang,+Z">Zheng Chang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Jantti,+R">Riku Jantti</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Backscatter communication (BC) becomes a promising energy-efficient solution for future wireless sensor networks (WSNs). Unmanned aerial vehicles (UAVs) enable flexible data collection from remote backscatter devices (BDs), yet conventional UAVs rely on omni-directional fixed-position antennas (FPAs), limiting channel gain and prolonging data collection time. To address this issue, we consider equipping a UAV with a directional movable antenna (MA) with high directivity and flexibility. The MA enhances channel gain by precisely aiming its main lobe at each BD, focusing transmission power for efficient communication. Our goal is to minimize the total data collection time by jointly optimizing the UAV's trajectory and the MA's orientation. We develop a deep reinforcement learning (DRL)-based strategy using the azimuth angle and distance between the UAV and each BD to simplify the agent's observation space. To ensure stability during training, we adopt Soft Actor-Critic (SAC) algorithm that balances exploration with reward maximization for efficient and reliable learning. Simulation results demonstrate that our proposed MA-equipped UAV with SAC outperforms both FPA-equipped UAVs and other RL methods, achieving significant reductions in both data collection time and energy consumption. </p> </div> </dd> <dt> <a name='item390'>[390]</a> <a href ="/abs/2411.13999" title="Abstract" id="2411.13999"> arXiv:2411.13999 </a> (cross-list from math.OC) [<a href="/pdf/2411.13999" title="Download PDF" id="pdf-2411.13999" aria-labelledby="pdf-2411.13999">pdf</a>, <a href="https://arxiv.org/html/2411.13999v1" title="View HTML" id="html-2411.13999" aria-labelledby="html-2411.13999" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13999" title="Other formats" id="oth-2411.13999" aria-labelledby="oth-2411.13999">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Accelerated zero-order SGD under high-order smoothness and overparameterized regime </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Bychkov,+G">Georgii Bychkov</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Dvinskikh,+D">Darina Dvinskikh</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Antsiferova,+A">Anastasia Antsiferova</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Gasnikov,+A">Alexander Gasnikov</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Lobanov,+A">Aleksandr Lobanov</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 10 pages, 1 figure </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Optimization and Control (math.OC)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> We present a novel gradient-free algorithm to solve a convex stochastic optimization problem, such as those encountered in medicine, physics, and machine learning (e.g., adversarial multi-armed bandit problem), where the objective function can only be computed through numerical simulation, either as the result of a real experiment or as feedback given by the function evaluations from an adversary. Thus we suppose that only a black-box access to the function values of the objective is available, possibly corrupted by adversarial noise: deterministic or stochastic. The noisy setup can arise naturally from modeling randomness within a simulation or by computer discretization, or when exact values of function are forbidden due to privacy issues, or when solving non-convex problems as convex ones with an inexact function oracle. By exploiting higher-order smoothness, fulfilled, e.g., in logistic regression, we improve the performance of zero-order methods developed under the assumption of classical smoothness (or having a Lipschitz gradient). The proposed algorithm enjoys optimal oracle complexity and is designed under an overparameterization setup, i.e., when the number of model parameters is much larger than the size of the training dataset. Overparametrized models fit to the training data perfectly while also having good generalization and outperforming underparameterized models on unseen data. We provide convergence guarantees for the proposed algorithm under both types of noise. Moreover, we estimate the maximum permissible adversarial noise level that maintains the desired accuracy in the Euclidean setup, and then we extend our results to a non-Euclidean setup. Our theoretical results are verified on the logistic regression problem. </p> </div> </dd> <dt> <a name='item391'>[391]</a> <a href ="/abs/2411.14013" title="Abstract" id="2411.14013"> arXiv:2411.14013 </a> (cross-list from eess.AS) [<a href="/pdf/2411.14013" title="Download PDF" id="pdf-2411.14013" aria-labelledby="pdf-2411.14013">pdf</a>, <a href="https://arxiv.org/html/2411.14013v1" title="View HTML" id="html-2411.14013" aria-labelledby="html-2411.14013" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14013" title="Other formats" id="oth-2411.14013" aria-labelledby="oth-2411.14013">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Single-Model Attribution for Spoofed Speech via Vocoder Fingerprints in an Open-World Setting </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Pizarro,+M">Mat铆as Pizarro</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Laszkiewicz,+M">Mike Laszkiewicz</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Kolossa,+D">Dorothea Kolossa</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Fischer,+A">Asja Fischer</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Audio and Speech Processing (eess.AS)</span>; Cryptography and Security (cs.CR); Machine Learning (cs.LG) </div> <p class='mathjax'> As speech generation technology advances, so do the potential threats of misusing spoofed speech signals. One way to address these threats is by attributing the signals to their source generative model. In this work, we are the first to tackle the single-model attribution task in an open-world setting, that is, we aim at identifying whether spoofed speech signals from unknown sources originate from a specific vocoder. We show that the standardized average residual between audio signals and their low-pass filtered or EnCodec filtered versions can serve as powerful vocoder fingerprints. The approach only requires data from the target vocoder and allows for simple but highly accurate distance-based model attribution. We demonstrate its effectiveness on LJSpeech and JSUT, achieving an average AUROC of over 99% in most settings. The accompanying robustness study shows that it is also resilient to noise levels up to a certain degree. </p> </div> </dd> <dt> <a name='item392'>[392]</a> <a href ="/abs/2411.14017" title="Abstract" id="2411.14017"> arXiv:2411.14017 </a> (cross-list from eess.IV) [<a href="/pdf/2411.14017" title="Download PDF" id="pdf-2411.14017" aria-labelledby="pdf-2411.14017">pdf</a>, <a href="https://arxiv.org/html/2411.14017v1" title="View HTML" id="html-2411.14017" aria-labelledby="html-2411.14017" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14017" title="Other formats" id="oth-2411.14017" aria-labelledby="oth-2411.14017">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Automatic brain tumor segmentation in 2D intra-operative ultrasound images using MRI tumor annotations </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Faanes,+M">Mathilde Faanes</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Helland,+R+H">Ragnhild Holden Helland</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Solheim,+O">Ole Solheim</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Reinertsen,+I">Ingerid Reinertsen</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 19, 8 figures, submitted to International Journal of Computer Assisted Radiology and Surgery </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Computer Vision and Pattern Recognition (cs.CV); Machine Learning (cs.LG) </div> <p class='mathjax'> Automatic segmentation of brain tumors in intra-operative ultrasound (iUS) images could facilitate localization of tumor tissue during resection surgery. The lack of large annotated datasets limits the current models performances. In this paper, we investigate the use of tumor annotations in pre-operative MRI images, which are more easily accessible than annotations in iUS images, for training of deep learning models for iUS brain tumor segmentation. We used 180 annotated pre-operative MRI images with corresponding unannotated iUS images, and 29 annotated iUS images. Image registration was performed to transfer the MRI annotations to the corresponding iUS images before training models with the nnU-Net framework. To validate the use of MRI labels, the models were compared to a model trained with only US annotated tumors, and a model with both US and MRI annotated tumors. In addition, the results were compared to annotations validated by an expert neurosurgeon on the same test set to measure inter-observer variability. The results showed similar performance for a model trained with only MRI annotated tumors, compared to a model trained with only US annotated tumors. The model trained using both modalities obtained slightly better results with an average Dice score of 0.62, where external expert annotations achieved a score of 0.67. The results also showed that the deep learning models were comparable to expert annotation for larger tumors (> 200 mm2), but perform clearly worse for smaller tumors (< 200 mm2). This shows that MRI tumor annotations can be used as a substitute for US tumor annotations to train a deep learning model for automatic brain tumor segmentation in intra-operative ultrasound images. Small tumors is a limitation for the current models and will be the focus of future work. The main models are available here: <a href="https://github.com/mathildefaanes/us_brain_tumor_segmentation" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item393'>[393]</a> <a href ="/abs/2411.14031" title="Abstract" id="2411.14031"> arXiv:2411.14031 </a> (cross-list from math.OC) [<a href="/pdf/2411.14031" title="Download PDF" id="pdf-2411.14031" aria-labelledby="pdf-2411.14031">pdf</a>, <a href="https://arxiv.org/html/2411.14031v1" title="View HTML" id="html-2411.14031" aria-labelledby="html-2411.14031" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14031" title="Other formats" id="oth-2411.14031" aria-labelledby="oth-2411.14031">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Numerical null controllability of parabolic PDEs using Lagrangian methods </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Fernandez-Cara,+E">Enrique Fernandez-Cara</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Morales,+R">Roberto Morales</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Souza,+D+A">Diego A. Souza</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Optimization and Control (math.OC)</span>; Analysis of PDEs (math.AP); Numerical Analysis (math.NA) </div> <p class='mathjax'> In this paper, we study several theoretical and numerical questions concerning the null controllability problems for linear parabolic equations and systems for several dimensions. The control is distributed and acts on a small subset of the domain. The main goal is to compute numerically a control that drives a numerical approximation of the state from prescribed initial data exactly to zero. We introduce a methodology for solving numerical controllability problems that is new in some sense. The main idea is to apply classical Lagrangian and Augmented Lagrangian techniques to suitable constrained extremal formulations that involve unbounded weights in time that make global Carleman inequalities possible. The theoretical results are validated by satisfactory numerical experiments for spatially 2D and 3D problems. </p> </div> </dd> <dt> <a name='item394'>[394]</a> <a href ="/abs/2411.14034" title="Abstract" id="2411.14034"> arXiv:2411.14034 </a> (cross-list from cond-mat.mtrl-sci) [<a href="/pdf/2411.14034" title="Download PDF" id="pdf-2411.14034" aria-labelledby="pdf-2411.14034">pdf</a>, <a href="https://arxiv.org/html/2411.14034v1" title="View HTML" id="html-2411.14034" aria-labelledby="html-2411.14034" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14034" title="Other formats" id="oth-2411.14034" aria-labelledby="oth-2411.14034">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Assessing data-driven predictions of band gap and electrical conductivity for transparent conducting materials </div> <div class='list-authors'><a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Ottomano,+F">Federico Ottomano</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Goulermas,+J+Y">John Y. Goulermas</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Gusev,+V">Vladimir Gusev</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Savani,+R">Rahul Savani</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Gaultois,+M+W">Michael W. Gaultois</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Manning,+T+D">Troy D. Manning</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Lin,+H">Hai Lin</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Manzanera,+T+P">Teresa P. Manzanera</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Poole,+E+G">Emmeline G. Poole</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Dyer,+M+S">Matthew S. Dyer</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Claridge,+J+B">John B. Claridge</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Alaria,+J">Jon Alaria</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Daniels,+L+M">Luke M. Daniels</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Varma,+S">Su Varma</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Rimmer,+D">David Rimmer</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Sanderson,+K">Kevin Sanderson</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Rosseinsky,+M+J">Matthew J. Rosseinsky</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Materials Science (cond-mat.mtrl-sci)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Machine Learning (ML) has offered innovative perspectives for accelerating the discovery of new functional materials, leveraging the increasing availability of material databases. Despite the promising advances, data-driven methods face constraints imposed by the quantity and quality of available data. Moreover, ML is often employed in tandem with simulated datasets originating from density functional theory (DFT), and assessed through in-sample evaluation schemes. This scenario raises questions about the practical utility of ML in uncovering new and significant material classes for industrial applications. Here, we propose a data-driven framework aimed at accelerating the discovery of new transparent conducting materials (TCMs), an important category of semiconductors with a wide range of applications. To mitigate the shortage of available data, we create and validate unique experimental databases, comprising several examples of existing TCMs. We assess state-of-the-art (SOTA) ML models for property prediction from the stoichiometry alone. We propose a bespoke evaluation scheme to provide empirical evidence on the ability of ML to uncover new, previously unseen materials of interest. We test our approach on a list of 55 compositions containing typical elements of known TCMs. Although our study indicates that ML tends to identify new TCMs compositionally similar to those in the training data, we empirically demonstrate that it can highlight material candidates that may have been previously overlooked, offering a systematic approach to identify materials that are likely to display TCMs characteristics. </p> </div> </dd> <dt> <a name='item395'>[395]</a> <a href ="/abs/2411.14057" title="Abstract" id="2411.14057"> arXiv:2411.14057 </a> (cross-list from math.CO) [<a href="/pdf/2411.14057" title="Download PDF" id="pdf-2411.14057" aria-labelledby="pdf-2411.14057">pdf</a>, <a href="https://arxiv.org/html/2411.14057v1" title="View HTML" id="html-2411.14057" aria-labelledby="html-2411.14057" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14057" title="Other formats" id="oth-2411.14057" aria-labelledby="oth-2411.14057">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Characterizing and Transforming DAGs within the I-LCA Framework </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Hellmuth,+M">Marc Hellmuth</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Lindeberg,+A">Anna Lindeberg</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 9 pages, 3 figures. arXiv admin note: text overlap with <a href="https://arxiv.org/abs/2411.00708" data-arxiv-id="2411.00708" class="link-https">arXiv:2411.00708</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Combinatorics (math.CO)</span>; Discrete Mathematics (cs.DM) </div> <p class='mathjax'> We explore the connections between clusters and least common ancestors (LCAs) in directed acyclic graphs (DAGs), focusing on DAGs with unique LCAs for specific subsets of their leaves. These DAGs are important in modeling phylogenetic networks that account for reticulate processes or horizontal gene transfer. Phylogenetic DAGs inferred from genomic data are often complex, obscuring evolutionary insights, especially when vertices lack support as LCAs for any subset of taxa. To address this, we focus on $I$-lca-relevant DAGs, where each vertex serves as the unique LCA for a subset $A$ of leaves of specific size $|A|\in I$. We characterize DAGs with the so-called $I$-lca-property and establish their close relationship to pre-$I$-ary and $I$-ary set systems. Moreover, we build upon recently established results that use a simple operator $\ominus$, enabling the transformation of arbitrary DAGs into $I$-lca-relevant DAGs. This process reduces unnecessary complexity while preserving the key structural properties of the original DAG. The set $C_G$ consists of all clusters in a DAG $G$, where clusters correspond to the descendant leaves of vertices. While in some cases $C_H = C_G$ when transforming $G$ into an $I$-lca-relevant DAG $H$, it often happens that certain clusters in $C_G$ do not appear as clusters in $H$. To understand this phenomenon in detail, we characterize the subset of clusters in $C_G$ that remain in $H$ for DAGs $G$ with the $I$-lca-property. Furthermore, we show that the set $W$ of vertices required to transform $G$ into $H = G \ominus W$ is uniquely determined for such DAGs. This, in turn, allows us to show that the transformed DAG $H$ is always a tree or a galled-tree whenever $C_G$ represents the clustering system of a tree or galled-tree and $G$ has the $I$-lca-property. In the latter case $C_H = C_G$ always holds. </p> </div> </dd> <dt> <a name='item396'>[396]</a> <a href ="/abs/2411.14078" title="Abstract" id="2411.14078"> arXiv:2411.14078 </a> (cross-list from astro-ph.IM) [<a href="/pdf/2411.14078" title="Download PDF" id="pdf-2411.14078" aria-labelledby="pdf-2411.14078">pdf</a>, <a href="https://arxiv.org/html/2411.14078v1" title="View HTML" id="html-2411.14078" aria-labelledby="html-2411.14078" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14078" title="Other formats" id="oth-2411.14078" aria-labelledby="oth-2411.14078">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Self-supervised learning for radio-astronomy source classification: a benchmark </div> <div class='list-authors'><a href="https://arxiv.org/search/astro-ph?searchtype=author&query=Cecconello,+T">Thomas Cecconello</a>, <a href="https://arxiv.org/search/astro-ph?searchtype=author&query=Riggi,+S">Simone Riggi</a>, <a href="https://arxiv.org/search/astro-ph?searchtype=author&query=Becciano,+U">Ugo Becciano</a>, <a href="https://arxiv.org/search/astro-ph?searchtype=author&query=Vitello,+F">Fabio Vitello</a>, <a href="https://arxiv.org/search/astro-ph?searchtype=author&query=Hopkins,+A+M">Andrew M. Hopkins</a>, <a href="https://arxiv.org/search/astro-ph?searchtype=author&query=Vizzari,+G">Giuseppe Vizzari</a>, <a href="https://arxiv.org/search/astro-ph?searchtype=author&query=Spampinato,+C">Concetto Spampinato</a>, <a href="https://arxiv.org/search/astro-ph?searchtype=author&query=Palazzo,+S">Simone Palazzo</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Instrumentation and Methods for Astrophysics (astro-ph.IM)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> The upcoming Square Kilometer Array (SKA) telescope marks a significant step forward in radio astronomy, presenting new opportunities and challenges for data analysis. Traditional visual models pretrained on optical photography images may not perform optimally on radio interferometry images, which have distinct visual characteristics. <br>Self-Supervised Learning (SSL) offers a promising approach to address this issue, leveraging the abundant unlabeled data in radio astronomy to train neural networks that learn useful representations from radio images. This study explores the application of SSL to radio astronomy, comparing the performance of SSL-trained models with that of traditional models pretrained on natural images, evaluating the importance of data curation for SSL, and assessing the potential benefits of self-supervision to different domain-specific radio astronomy datasets. <br>Our results indicate that, SSL-trained models achieve significant improvements over the baseline in several downstream tasks, especially in the linear evaluation setting; when the entire backbone is fine-tuned, the benefits of SSL are less evident but still outperform pretraining. These findings suggest that SSL can play a valuable role in efficiently enhancing the analysis of radio astronomical data. The trained models and code is available at: \url{<a href="https://github.com/dr4thmos/solo-learn-radio" rel="external noopener nofollow" class="link-external link-https">this https URL</a>} </p> </div> </dd> <dt> <a name='item397'>[397]</a> <a href ="/abs/2411.14093" title="Abstract" id="2411.14093"> arXiv:2411.14093 </a> (cross-list from math.OC) [<a href="/pdf/2411.14093" title="Download PDF" id="pdf-2411.14093" aria-labelledby="pdf-2411.14093">pdf</a>, <a href="https://arxiv.org/html/2411.14093v1" title="View HTML" id="html-2411.14093" aria-labelledby="html-2411.14093" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14093" title="Other formats" id="oth-2411.14093" aria-labelledby="oth-2411.14093">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Desingularization of bounded-rank tensor sets </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Gao,+B">Bin Gao</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Peng,+R">Renfeng Peng</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Yuan,+Y">Ya-xiang Yuan</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 41 pages, 10 figures, 1 table </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Optimization and Control (math.OC)</span>; Algebraic Geometry (math.AG); Numerical Analysis (math.NA) </div> <p class='mathjax'> Low-rank tensors appear to be prosperous in many applications. However, the sets of bounded-rank tensors are non-smooth and non-convex algebraic varieties, rendering the low-rank optimization problems to be challenging. To this end, we delve into the geometry of bounded-rank tensor sets, including Tucker and tensor train formats. We propose a desingularization approach for bounded-rank tensor sets by introducing slack variables, resulting in a low-dimensional smooth manifold embedded in a higher-dimensional space while preserving the structure of low-rank tensor formats. Subsequently, optimization on tensor varieties can be reformulated to optimization on smooth manifolds, where the methods and convergence are well explored. We reveal the relationship between the landscape of optimization on varieties and that of optimization on manifolds. Numerical experiments on tensor completion illustrate that the proposed methods are in favor of others under different rank parameters. </p> </div> </dd> <dt> <a name='item398'>[398]</a> <a href ="/abs/2411.14100" title="Abstract" id="2411.14100"> arXiv:2411.14100 </a> (cross-list from eess.AS) [<a href="/pdf/2411.14100" title="Download PDF" id="pdf-2411.14100" aria-labelledby="pdf-2411.14100">pdf</a>, <a href="https://arxiv.org/html/2411.14100v1" title="View HTML" id="html-2411.14100" aria-labelledby="html-2411.14100" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14100" title="Other formats" id="oth-2411.14100" aria-labelledby="oth-2411.14100">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> BEST-STD: Bidirectional Mamba-Enhanced Speech Tokenization for Spoken Term Detection </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Singh,+A">Anup Singh</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Demuynck,+K">Kris Demuynck</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Arora,+V">Vipul Arora</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Submitted to ICASSP 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Audio and Speech Processing (eess.AS)</span>; Computation and Language (cs.CL); Information Retrieval (cs.IR) </div> <p class='mathjax'> Spoken term detection (STD) is often hindered by reliance on frame-level features and the computationally intensive DTW-based template matching, limiting its practicality. To address these challenges, we propose a novel approach that encodes speech into discrete, speaker-agnostic semantic tokens. This facilitates fast retrieval using text-based search algorithms and effectively handles out-of-vocabulary terms. Our approach focuses on generating consistent token sequences across varying utterances of the same term. We also propose a bidirectional state space modeling within the Mamba encoder, trained in a self-supervised learning framework, to learn contextual frame-level features that are further encoded into discrete tokens. Our analysis shows that our speech tokens exhibit greater speaker invariance than those from existing tokenizers, making them more suitable for STD tasks. Empirical evaluation on LibriSpeech and TIMIT databases indicates that our method outperforms existing STD baselines while being more efficient. </p> </div> </dd> <dt> <a name='item399'>[399]</a> <a href ="/abs/2411.14106" title="Abstract" id="2411.14106"> arXiv:2411.14106 </a> (cross-list from physics.ao-ph) [<a href="/pdf/2411.14106" title="Download PDF" id="pdf-2411.14106" aria-labelledby="pdf-2411.14106">pdf</a>, <a href="https://arxiv.org/html/2411.14106v1" title="View HTML" id="html-2411.14106" aria-labelledby="html-2411.14106" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14106" title="Other formats" id="oth-2411.14106" aria-labelledby="oth-2411.14106">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Adjoint-based online learning of two-layer quasi-geostrophic baroclinic turbulence </div> <div class='list-authors'><a href="https://arxiv.org/search/physics?searchtype=author&query=Yan,+F+E">Fei Er Yan</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Frezat,+H">Hugo Frezat</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Sommer,+J+L">Julien Le Sommer</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Mak,+J">Julian Mak</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Otness,+K">Karl Otness</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 25 pages, 1 table, 8 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Atmospheric and Oceanic Physics (physics.ao-ph)</span>; Machine Learning (cs.LG); Fluid Dynamics (physics.flu-dyn) </div> <p class='mathjax'> For reasons of computational constraint, most global ocean circulation models used for Earth System Modeling still rely on parameterizations of sub-grid processes, and limitations in these parameterizations affect the modeled ocean circulation and impact on predictive skill. An increasingly popular approach is to leverage machine learning approaches for parameterizations, regressing for a map between the resolved state and missing feedbacks in a fluid system as a supervised learning task. However, the learning is often performed in an `offline' fashion, without involving the underlying fluid dynamical model during the training stage. Here, we explore the `online' approach that involves the fluid dynamical model during the training stage for the learning of baroclinic turbulence and its parameterization, with reference to ocean eddy parameterization. Two online approaches are considered: a full adjoint-based online approach, related to traditional adjoint optimization approaches that require a `differentiable' dynamical model, and an approximately online approach that approximates the adjoint calculation and does not require a differentiable dynamical model. The online approaches are found to be generally more skillful and numerically stable than offline approaches. Others details relating to online training, such as window size, machine learning model set up and designs of the loss functions are detailed to aid in further explorations of the online training methodology for Earth System Modeling. </p> </div> </dd> <dt> <a name='item400'>[400]</a> <a href ="/abs/2411.14135" title="Abstract" id="2411.14135"> arXiv:2411.14135 </a> (cross-list from eess.IV) [<a href="/pdf/2411.14135" title="Download PDF" id="pdf-2411.14135" aria-labelledby="pdf-2411.14135">pdf</a>, <a href="https://arxiv.org/html/2411.14135v1" title="View HTML" id="html-2411.14135" aria-labelledby="html-2411.14135" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14135" title="Other formats" id="oth-2411.14135" aria-labelledby="oth-2411.14135">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Compact Visual Data Representation for Green Multimedia -- A Human Visual System Perspective </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Chen,+P">Peilin Chen</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Fang,+X">Xiaohan Fang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Wang,+M">Meng Wang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Wang,+S">Shiqi Wang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Ma,+S">Siwei Ma</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Multimedia (cs.MM) </div> <p class='mathjax'> The Human Visual System (HVS), with its intricate sophistication, is capable of achieving ultra-compact information compression for visual signals. This remarkable ability is coupled with high generalization capability and energy efficiency. By contrast, the state-of-the-art Versatile Video Coding (VVC) standard achieves a compression ratio of around 1,000 times for raw visual data. This notable disparity motivates the research community to draw inspiration to effectively handle the immense volume of visual data in a green way. Therefore, this paper provides a survey of how visual data can be efficiently represented for green multimedia, in particular when the ultimate task is knowledge extraction instead of visual signal reconstruction. We introduce recent research efforts that promote green, sustainable, and efficient multimedia in this field. Moreover, we discuss how the deep understanding of the HVS can benefit the research community, and envision the development of future green multimedia technologies. </p> </div> </dd> <dt> <a name='item401'>[401]</a> <a href ="/abs/2411.14143" title="Abstract" id="2411.14143"> arXiv:2411.14143 </a> (cross-list from math.CT) [<a href="/pdf/2411.14143" title="Download PDF" id="pdf-2411.14143" aria-labelledby="pdf-2411.14143">pdf</a>, <a href="https://arxiv.org/html/2411.14143v1" title="View HTML" id="html-2411.14143" aria-labelledby="html-2411.14143" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14143" title="Other formats" id="oth-2411.14143" aria-labelledby="oth-2411.14143">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Volume preservation of Butcher series methods from the operad viewpoint </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Dotsenko,+V">Vladimir Dotsenko</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Laubie,+P">Paul Laubie</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 25 pages, comments are welcome </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Category Theory (math.CT)</span>; K-Theory and Homology (math.KT); Numerical Analysis (math.NA); Quantum Algebra (math.QA) </div> <p class='mathjax'> We study a coloured operad involving rooted trees and directed cycles of rooted trees that generalizes the operad of rooted trees of Chapoton and Livernet. We describe all the relations between the generators of a certain suboperad of that operad, and compute the Chevalley-Eilenberg homology of two naturally arising differential graded Lie algebras. This allows us to give short and conceptual new proofs of two important results on Butcher series methods of numerical solution of ODEs: absence of volume-preserving integration schemes and the acyclicity of the aromatic bicomplex, the key step in a complete classification of volume-preserving integration schemes using the so called aromatic Butcher series. </p> </div> </dd> <dt> <a name='item402'>[402]</a> <a href ="/abs/2411.14157" title="Abstract" id="2411.14157"> arXiv:2411.14157 </a> (cross-list from q-bio.QM) [<a href="/pdf/2411.14157" title="Download PDF" id="pdf-2411.14157" aria-labelledby="pdf-2411.14157">pdf</a>, <a href="https://arxiv.org/html/2411.14157v1" title="View HTML" id="html-2411.14157" aria-labelledby="html-2411.14157" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14157" title="Other formats" id="oth-2411.14157" aria-labelledby="oth-2411.14157">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> DrugGen: Advancing Drug Discovery with Large Language Models and Reinforcement Learning Feedback </div> <div class='list-authors'><a href="https://arxiv.org/search/q-bio?searchtype=author&query=Sheikholeslami,+M">Mahsa Sheikholeslami</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Mazrouei,+N">Navid Mazrouei</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Gheisari,+Y">Yousof Gheisari</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Fasihi,+A">Afshin Fasihi</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Irajpour,+M">Matin Irajpour</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Motahharynia,+A">Ali Motahharynia</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 20 pages, 5 figures, 3 tables, and 7 supplementary files. To use the model, see <a href="https://huggingface.co/alimotahharynia/DrugGen" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Quantitative Methods (q-bio.QM)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Traditional drug design faces significant challenges due to inherent chemical and biological complexities, often resulting in high failure rates in clinical trials. Deep learning advancements, particularly generative models, offer potential solutions to these challenges. One promising algorithm is DrugGPT, a transformer-based model, that generates small molecules for input protein sequences. Although promising, it generates both chemically valid and invalid structures and does not incorporate the features of approved drugs, resulting in time-consuming and inefficient drug discovery. To address these issues, we introduce DrugGen, an enhanced model based on the DrugGPT structure. DrugGen is fine-tuned on approved drug-target interactions and optimized with proximal policy optimization. By giving reward feedback from protein-ligand binding affinity prediction using pre-trained transformers (PLAPT) and a customized invalid structure assessor, DrugGen significantly improves performance. Evaluation across multiple targets demonstrated that DrugGen achieves 100% valid structure generation compared to 95.5% with DrugGPT and produced molecules with higher predicted binding affinities (7.22 [6.30-8.07]) compared to DrugGPT (5.81 [4.97-6.63]) while maintaining diversity and novelty. Docking simulations further validate its ability to generate molecules targeting binding sites effectively. For example, in the case of fatty acid-binding protein 5 (FABP5), DrugGen generated molecules with superior docking scores (FABP5/11, -9.537 and FABP5/5, -8.399) compared to the reference molecule (Palmitic acid, -6.177). Beyond lead compound generation, DrugGen also shows potential for drug repositioning and creating novel pharmacophores for existing targets. By producing high-quality small molecules, DrugGen provides a high-performance medium for advancing pharmaceutical research and drug discovery. </p> </div> </dd> <dt> <a name='item403'>[403]</a> <a href ="/abs/2411.14166" title="Abstract" id="2411.14166"> arXiv:2411.14166 </a> (cross-list from math.OC) [<a href="/pdf/2411.14166" title="Download PDF" id="pdf-2411.14166" aria-labelledby="pdf-2411.14166">pdf</a>, <a href="/format/2411.14166" title="Other formats" id="oth-2411.14166" aria-labelledby="oth-2411.14166">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> SPARKLE: A Unified Single-Loop Primal-Dual Framework for Decentralized Bilevel Optimization </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Zhu,+S">Shuchen Zhu</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Kong,+B">Boao Kong</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Lu,+S">Songtao Lu</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Huang,+X">Xinmeng Huang</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Yuan,+K">Kun Yuan</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 73 pages, the Thirty-Eighth Annual Conference on Neural Information Processing Systems (2024) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Optimization and Control (math.OC)</span>; Machine Learning (cs.LG); Machine Learning (stat.ML) </div> <p class='mathjax'> This paper studies decentralized bilevel optimization, in which multiple agents collaborate to solve problems involving nested optimization structures with neighborhood communications. Most existing literature primarily utilizes gradient tracking to mitigate the influence of data heterogeneity, without exploring other well-known heterogeneity-correction techniques such as EXTRA or Exact Diffusion. Additionally, these studies often employ identical decentralized strategies for both upper- and lower-level problems, neglecting to leverage distinct mechanisms across different levels. To address these limitations, this paper proposes SPARKLE, a unified Single-loop Primal-dual AlgoRithm frameworK for decentraLized bilEvel optimization. SPARKLE offers the flexibility to incorporate various heterogeneitycorrection strategies into the algorithm. Moreover, SPARKLE allows for different strategies to solve upper- and lower-level problems. We present a unified convergence analysis for SPARKLE, applicable to all its variants, with state-of-the-art convergence rates compared to existing decentralized bilevel algorithms. Our results further reveal that EXTRA and Exact Diffusion are more suitable for decentralized bilevel optimization, and using mixed strategies in bilevel algorithms brings more benefits than relying solely on gradient tracking. </p> </div> </dd> <dt> <a name='item404'>[404]</a> <a href ="/abs/2411.14184" title="Abstract" id="2411.14184"> arXiv:2411.14184 </a> (cross-list from eess.IV) [<a href="/pdf/2411.14184" title="Download PDF" id="pdf-2411.14184" aria-labelledby="pdf-2411.14184">pdf</a>, <a href="https://arxiv.org/html/2411.14184v1" title="View HTML" id="html-2411.14184" aria-labelledby="html-2411.14184" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14184" title="Other formats" id="oth-2411.14184" aria-labelledby="oth-2411.14184">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Deep Learning Approach for Enhancing Oral Squamous Cell Carcinoma with LIME Explainable AI Technique </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Islam,+S">Samiha Islam</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Mahmud,+M+Z">Muhammad Zawad Mahmud</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Alve,+S+R">Shahran Rahman Alve</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Chowdhury,+M+M+U">Md. Mejbah Ullah Chowdhury</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Under Review at an IEEE conference </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> The goal of the present study is to analyze an application of deep learning models in order to augment the diagnostic performance of oral squamous cell carcinoma (OSCC) with a longitudinal cohort study using the Histopathological Imaging Database for oral cancer analysis. The dataset consisted of 5192 images (2435 Normal and 2511 OSCC), which were allocated between training, testing, and validation sets with an estimated ratio repartition of about 52% for the OSCC group, and still, our performance measure was validated on a combination set that contains almost equal number of sample in this use case as entire database have been divided into half using stratified splitting technique based again near binary proportion but total distribution was around even. We selected four deep-learning architectures for evaluation in the present study: ResNet101, DenseNet121, VGG16, and EfficientnetB3. EfficientNetB3 was found to be the best, with an accuracy of 98.33% and F1 score (0.9844), and it took remarkably less computing power in comparison with other models. The subsequent one was DenseNet121, with 90.24% accuracy and an F1 score of 90.45%. Moreover, we employed the Local Interpretable Model-agnostic Explanations (LIME) method to clarify why EfficientNetB3 made certain decisions with its predictions to improve the explainability and trustworthiness of results. This work provides evidence for the possible superior diagnosis in OSCC activated from the EfficientNetB3 model with the explanation of AI techniques such as LIME and paves an important groundwork to build on towards clinical usage. </p> </div> </dd> <dt> <a name='item405'>[405]</a> <a href ="/abs/2411.14192" title="Abstract" id="2411.14192"> arXiv:2411.14192 </a> (cross-list from physics.flu-dyn) [<a href="/pdf/2411.14192" title="Download PDF" id="pdf-2411.14192" aria-labelledby="pdf-2411.14192">pdf</a>, <a href="https://arxiv.org/html/2411.14192v1" title="View HTML" id="html-2411.14192" aria-labelledby="html-2411.14192" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14192" title="Other formats" id="oth-2411.14192" aria-labelledby="oth-2411.14192">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Learning Pore-scale Multi-phase Flow from Experimental Data with Graph Neural Network </div> <div class='list-authors'><a href="https://arxiv.org/search/physics?searchtype=author&query=Gu,+Y">Yuxuan Gu</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Spurin,+C">Catherine Spurin</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Wen,+G">Gege Wen</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accpeted for Machine Learning and the Physical Sciences Workshop at the 38th conference on Neural Information Processing Systems (NeurIPS 2024) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Fluid Dynamics (physics.flu-dyn)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Understanding the process of multiphase fluid flow through porous media is crucial for many climate change mitigation technologies, including CO$_2$ geological storage, hydrogen storage, and fuel cells. However, current numerical models are often incapable of accurately capturing the complex pore-scale physics observed in experiments. In this study, we address this challenge using a graph neural network-based approach and directly learn pore-scale fluid flow using micro-CT experimental data. We propose a Long-Short-Edge MeshGraphNet (LSE-MGN) that predicts the state of each node in the pore space at each time step. During inference, given an initial state, the model can autoregressively predict the evolution of the multiphase flow process over time. This approach successfully captures the physics from the high-resolution experimental data while maintaining computational efficiency, providing a promising direction for accurate and efficient pore-scale modeling of complex multiphase fluid flow dynamics. </p> </div> </dd> <dt> <a name='item406'>[406]</a> <a href ="/abs/2411.14196" title="Abstract" id="2411.14196"> arXiv:2411.14196 </a> (cross-list from physics.bio-ph) [<a href="/pdf/2411.14196" title="Download PDF" id="pdf-2411.14196" aria-labelledby="pdf-2411.14196">pdf</a>, <a href="https://arxiv.org/html/2411.14196v1" title="View HTML" id="html-2411.14196" aria-labelledby="html-2411.14196" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14196" title="Other formats" id="oth-2411.14196" aria-labelledby="oth-2411.14196">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Uncertainty Quantification in Working Memory via Moment Neural Networks </div> <div class='list-authors'><a href="https://arxiv.org/search/physics?searchtype=author&query=Ma,+H">Hengyuan Ma</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Lu,+W">Wenlian Lu</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Feng,+J">Jianfeng Feng</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Code released: <a href="https://github.com/AwakerMhy/mnn_wm_uq" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Biological Physics (physics.bio-ph)</span>; Neural and Evolutionary Computing (cs.NE); Applications (stat.AP) </div> <p class='mathjax'> Humans possess a finely tuned sense of uncertainty that helps anticipate potential errors, vital for adaptive behavior and survival. However, the underlying neural mechanisms remain unclear. This study applies moment neural networks (MNNs) to explore the neural mechanism of uncertainty quantification in working memory (WM). The MNN captures nonlinear coupling of the first two moments in spiking neural networks (SNNs), identifying firing covariance as a key indicator of uncertainty in encoded information. Trained on a WM task, the model demonstrates coding precision and uncertainty quantification comparable to human performance. Analysis reveals a link between the probabilistic and sampling-based coding for uncertainty representation. Transferring the MNN's weights to an SNN replicates these results. Furthermore, the study provides testable predictions demonstrating how noise and heterogeneity enhance WM performance, highlighting their beneficial role rather than being mere biological byproducts. These findings offer insights into how the brain effectively manages uncertainty with exceptional accuracy. </p> </div> </dd> <dt> <a name='item407'>[407]</a> <a href ="/abs/2411.14230" title="Abstract" id="2411.14230"> arXiv:2411.14230 </a> (cross-list from econ.GN) [<a href="/pdf/2411.14230" title="Download PDF" id="pdf-2411.14230" aria-labelledby="pdf-2411.14230">pdf</a>, <a href="/format/2411.14230" title="Other formats" id="oth-2411.14230" aria-labelledby="oth-2411.14230">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Public sentiments on the fourth industrial revolution: An unsolicited public opinion poll from Twitter </div> <div class='list-authors'><a href="https://arxiv.org/search/econ?searchtype=author&query=Abbonato,+D">Diletta Abbonato</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 40 pages, 6 tables </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">General Economics (econ.GN)</span>; Computers and Society (cs.CY); Social and Information Networks (cs.SI) </div> <p class='mathjax'> This article explores public perceptions on the Fourth Industrial Revolution (4IR) through an analysis of social media discourse across six European countries. Using sentiment analysis and machine learning techniques on a dataset of tweets and media articles, we assess how the public reacts to the integration of technologies such as artificial intelligence, robotics, and blockchain into society. The results highlight a significant polarization of opinions, with a shift from neutral to more definitive stances either embracing or resisting technological impacts. Positive sentiments are often associated with technological enhancements in quality of life and economic opportunities, whereas concerns focus on issues of privacy, data security, and ethical implications. This polarization underscores the need for policymakers to engage proactively with the public to address fears and harness the benefits of 4IR technologies. The findings also advocate for digital literacy and public awareness programs to mitigate misinformation and foster an informed public discourse on future technological integration. This study contributes to the ongoing debate on aligning technological advances with societal values and needs, emphasizing the role of informed public opinion in shaping effective policy. </p> </div> </dd> <dt> <a name='item408'>[408]</a> <a href ="/abs/2411.14238" title="Abstract" id="2411.14238"> arXiv:2411.14238 </a> (cross-list from math.CO) [<a href="/pdf/2411.14238" title="Download PDF" id="pdf-2411.14238" aria-labelledby="pdf-2411.14238">pdf</a>, <a href="https://arxiv.org/html/2411.14238v1" title="View HTML" id="html-2411.14238" aria-labelledby="html-2411.14238" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14238" title="Other formats" id="oth-2411.14238" aria-labelledby="oth-2411.14238">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Computing the permanental polynomial of $4k$-intercyclic bipartite graphs </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Bapat,+R+B">Ravindra B. Bapat</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Singh,+R">Ranveer Singh</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Wankhede,+H">Hitesh Wankhede</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 9 pages </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> American Journal of Combinatorics, 3:35-43, (2024) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Combinatorics (math.CO)</span>; Discrete Mathematics (cs.DM) </div> <p class='mathjax'> Let $G$ be a bipartite graph with adjacency matrix $A(G)$. The characteristic polynomial $\phi(G,x)=\det(xI-A(G))$ and the permanental polynomial $\pi(G,x) = \text{per}(xI-A(G))$ are both graph invariants used to distinguish graphs. For bipartite graphs, we define the modified characteristic polynomial, which is obtained by changing the signs of some of the coefficients of $\phi(G,x)$. For $4k$-intercyclic bipartite graphs, i.e., those for which the removal of any $4k$-cycle results in a $C_{4k}$-free graph, we provide an expression for $\pi(G,x)$ in terms of the modified characteristic polynomial of the graph and its subgraphs. Our approach is purely combinatorial in contrast to the Pfaffian orientation method found in the literature to compute the permanental polynomial. </p> </div> </dd> <dt> <a name='item409'>[409]</a> <a href ="/abs/2411.14250" title="Abstract" id="2411.14250"> arXiv:2411.14250 </a> (cross-list from eess.IV) [<a href="/pdf/2411.14250" title="Download PDF" id="pdf-2411.14250" aria-labelledby="pdf-2411.14250">pdf</a>, <a href="https://arxiv.org/html/2411.14250v1" title="View HTML" id="html-2411.14250" aria-labelledby="html-2411.14250" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14250" title="Other formats" id="oth-2411.14250" aria-labelledby="oth-2411.14250">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> CP-UNet: Contour-based Probabilistic Model for Medical Ultrasound Images Segmentation </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Yu,+R">Ruiguo Yu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Zhang,+Y">Yiyang Zhang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Tian,+Y">Yuan Tian</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Liu,+Z">Zhiqiang Liu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Li,+X">Xuewei Li</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Gao,+J">Jie Gao</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 4 pages, 4 figures, 2 tables;For icassp2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> Deep learning-based segmentation methods are widely utilized for detecting lesions in ultrasound images. Throughout the imaging procedure, the attenuation and scattering of ultrasound waves cause contour blurring and the formation of artifacts, limiting the clarity of the acquired ultrasound images. To overcome this challenge, we propose a contour-based probabilistic segmentation model CP-UNet, which guides the segmentation network to enhance its focus on contour during decoding. We design a novel down-sampling module to enable the contour probability distribution modeling and encoding stages to acquire global-local features. Furthermore, the Gaussian Mixture Model utilizes optimized features to model the contour distribution, capturing the uncertainty of lesion boundaries. Extensive experiments with several state-of-the-art deep learning segmentation methods on three ultrasound image datasets show that our method performs better on breast and thyroid lesions segmentation. </p> </div> </dd> <dt> <a name='item410'>[410]</a> <a href ="/abs/2411.14269" title="Abstract" id="2411.14269"> arXiv:2411.14269 </a> (cross-list from eess.IV) [<a href="/pdf/2411.14269" title="Download PDF" id="pdf-2411.14269" aria-labelledby="pdf-2411.14269">pdf</a>, <a href="https://arxiv.org/html/2411.14269v1" title="View HTML" id="html-2411.14269" aria-labelledby="html-2411.14269" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14269" title="Other formats" id="oth-2411.14269" aria-labelledby="oth-2411.14269">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Guided MRI Reconstruction via Schr\"odinger Bridge </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Wang,+Y">Yue Wang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Zhou,+T">Tian Zhou</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Cui,+Z">Zhuo-xu Cui</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Huang,+B">Bingsheng Huang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Zheng,+H">Hairong Zheng</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Liang,+D">Dong Liang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Zhu,+Y">Yanjie Zhu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Computer Vision and Pattern Recognition (cs.CV); Signal Processing (eess.SP) </div> <p class='mathjax'> Magnetic Resonance Imaging (MRI) is a multi-contrast imaging technique in which different contrast images share similar structural information. However, conventional diffusion models struggle to effectively leverage this structural similarity. Recently, the Schr枚dinger Bridge (SB), a nonlinear extension of the diffusion model, has been proposed to establish diffusion paths between any distributions, allowing the incorporation of guided priors. This study proposes an SB-based, multi-contrast image-guided reconstruction framework that establishes a diffusion bridge between the guiding and target image distributions. By using the guiding image along with data consistency during sampling, the target image is reconstructed more accurately. To better address structural differences between images, we introduce an inversion strategy from the field of image editing, termed $\mathbf{I}^2$SB-inversion. Experiments on a paried T1 and T2-FLAIR datasets demonstrate that $\mathbf{I}^2$SB-inversion achieve a high acceleration up to 14.4 and outperforms existing methods in terms of both reconstruction accuracy and stability. </p> </div> </dd> <dt> <a name='item411'>[411]</a> <a href ="/abs/2411.14292" title="Abstract" id="2411.14292"> arXiv:2411.14292 </a> (cross-list from quant-ph) [<a href="/pdf/2411.14292" title="Download PDF" id="pdf-2411.14292" aria-labelledby="pdf-2411.14292">pdf</a>, <a href="https://arxiv.org/html/2411.14292v1" title="View HTML" id="html-2411.14292" aria-labelledby="html-2411.14292" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14292" title="Other formats" id="oth-2411.14292" aria-labelledby="oth-2411.14292">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Hypothesis testing of symmetry in quantum dynamics </div> <div class='list-authors'><a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Chen,+Y">Yu-Ao Chen</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Zhu,+C">Chenghong Zhu</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=He,+K">Keming He</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Liu,+Y">Yingjian Liu</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Wang,+X">Xin Wang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 14 pages </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Quantum Physics (quant-ph)</span>; Information Theory (cs.IT) </div> <p class='mathjax'> Symmetry plays a crucial role in quantum physics, dictating the behavior and dynamics of physical systems. In this paper, We develop a hypothesis-testing framework for quantum dynamics symmetry using a limited number of queries to the unknown unitary operation and establish the quantum max-relative entropy lower bound for the type-II error. We construct optimal ancilla-free protocols that achieve optimal type-II error probability for testing time-reversal symmetry (T-symmetry) and diagonal symmetry (Z-symmetry) with limited queries. Contrasting with the advantages of indefinite causal order strategies in various quantum information processing tasks, we show that parallel, adaptive, and indefinite causal order strategies have equal power for our tasks. We establish optimal protocols for T-symmetry testing and Z-symmetry testing for 6 and 5 queries, respectively, from which we infer that the type-II error exhibits a decay rate of $\mathcal{O}(m^{-2})$ with respect to the number of queries $m$. This represents a significant improvement over the basic repetition protocols without using global entanglement, where the error decays at a slower rate of $\mathcal{O}(m^{-1})$. </p> </div> </dd> <dt> <a name='item412'>[412]</a> <a href ="/abs/2411.14317" title="Abstract" id="2411.14317"> arXiv:2411.14317 </a> (cross-list from cond-mat.stat-mech) [<a href="/pdf/2411.14317" title="Download PDF" id="pdf-2411.14317" aria-labelledby="pdf-2411.14317">pdf</a>, <a href="https://arxiv.org/html/2411.14317v1" title="View HTML" id="html-2411.14317" aria-labelledby="html-2411.14317" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14317" title="Other formats" id="oth-2411.14317" aria-labelledby="oth-2411.14317">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Model-free learning of probability flows: Elucidating the nonequilibrium dynamics of flocking </div> <div class='list-authors'><a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Boffi,+N+M">Nicholas M. Boffi</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Vanden-Eijnden,+E">Eric Vanden-Eijnden</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Statistical Mechanics (cond-mat.stat-mech)</span>; Machine Learning (cs.LG); Probability (math.PR) </div> <p class='mathjax'> Active systems comprise a class of nonequilibrium dynamics in which individual components autonomously dissipate energy. Efforts towards understanding the role played by activity have centered on computation of the entropy production rate (EPR), which quantifies the breakdown of time reversal symmetry. A fundamental difficulty in this program is that high dimensionality of the phase space renders traditional computational techniques infeasible for estimating the EPR. Here, we overcome this challenge with a novel deep learning approach that estimates probability currents directly from stochastic system trajectories. We derive a new physical connection between the probability current and two local definitions of the EPR for inertial systems, which we apply to characterize the departure from equilibrium in a canonical model of flocking. Our results highlight that entropy is produced and consumed on the spatial interface of a flock as the interplay between alignment and fluctuation dynamically creates and annihilates order. By enabling the direct visualization of when and where a given system is out of equilibrium, we anticipate that our methodology will advance the understanding of a broad class of complex nonequilibrium dynamics. </p> </div> </dd> <dt> <a name='item413'>[413]</a> <a href ="/abs/2411.14336" title="Abstract" id="2411.14336"> arXiv:2411.14336 </a> (cross-list from math.PR) [<a href="/pdf/2411.14336" title="Download PDF" id="pdf-2411.14336" aria-labelledby="pdf-2411.14336">pdf</a>, <a href="https://arxiv.org/html/2411.14336v1" title="View HTML" id="html-2411.14336" aria-labelledby="html-2411.14336" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14336" title="Other formats" id="oth-2411.14336" aria-labelledby="oth-2411.14336">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Finding the root in random nearest neighbor trees </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Brandenberger,+A">Anna Brandenberger</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Marcussen,+C">Cassandra Marcussen</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Mossel,+E">Elchanan Mossel</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Sudan,+M">Madhu Sudan</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 22 pages, 7 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Probability (math.PR)</span>; Data Structures and Algorithms (cs.DS); Social and Information Networks (cs.SI) </div> <p class='mathjax'> We study the inference of network archaeology in growing random geometric graphs. We consider the root finding problem for a random nearest neighbor tree in dimension $d \in \mathbb{N}$, generated by sequentially embedding vertices uniformly at random in the $d$-dimensional torus and connecting each new vertex to the nearest existing vertex. More precisely, given an error parameter $\varepsilon > 0$ and the unlabeled tree, we want to efficiently find a small set of candidate vertices, such that the root is included in this set with probability at least $1 - \varepsilon$. We call such a candidate set a $\textit{confidence set}$. We define several variations of the root finding problem in geometric settings -- embedded, metric, and graph root finding -- which differ based on the nature of the type of metric information provided in addition to the graph structure (torus embedding, edge lengths, or no additional information, respectively). <br>We show that there exist efficient root finding algorithms for embedded and metric root finding. For embedded root finding, we derive upper and lower bounds (uniformly bounded in $n$) on the size of the confidence set: the upper bound is subpolynomial in $1/\varepsilon$ and stems from an explicit efficient algorithm, and the information-theoretic lower bound is polylogarithmic in $1/\varepsilon$. In particular, in $d=1$, we obtain matching upper and lower bounds for a confidence set of size $\Theta\left(\frac{\log(1/\varepsilon)}{\log \log(1/\varepsilon)} \right)$. </p> </div> </dd> <dt> <a name='item414'>[414]</a> <a href ="/abs/2411.14341" title="Abstract" id="2411.14341"> arXiv:2411.14341 </a> (cross-list from stat.ML) [<a href="/pdf/2411.14341" title="Download PDF" id="pdf-2411.14341" aria-labelledby="pdf-2411.14341">pdf</a>, <a href="/format/2411.14341" title="Other formats" id="oth-2411.14341" aria-labelledby="oth-2411.14341">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Logarithmic Neyman Regret for Adaptive Estimation of the Average Treatment Effect </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Neopane,+O">Ojash Neopane</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Ramdas,+A">Aaditya Ramdas</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Singh,+A">Aarti Singh</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 12 pages, 2 figures. Submitted to AISTATS 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Estimation of the Average Treatment Effect (ATE) is a core problem in causal inference with strong connections to Off-Policy Evaluation in Reinforcement Learning. This paper considers the problem of adaptively selecting the treatment allocation probability in order to improve estimation of the ATE. The majority of prior work on adaptive ATE estimation focus on asymptotic guarantees, and in turn overlooks important practical considerations such as the difficulty of learning the optimal treatment allocation as well as hyper-parameter selection. Existing non-asymptotic methods are limited by poor empirical performance and exponential scaling of the Neyman regret with respect to problem parameters. In order to address these gaps, we propose and analyze the Clipped Second Moment Tracking (ClipSMT) algorithm, a variant of an existing algorithm with strong asymptotic optimality guarantees, and provide finite sample bounds on its Neyman regret. Our analysis shows that ClipSMT achieves exponential improvements in Neyman regret on two fronts: improving the dependence on $T$ from $O(\sqrt{T})$ to $O(\log T)$, as well as reducing the exponential dependence on problem parameters to a polynomial dependence. Finally, we conclude with simulations which show the marked improvement of ClipSMT over existing approaches. </p> </div> </dd> <dt> <a name='item415'>[415]</a> <a href ="/abs/2411.14351" title="Abstract" id="2411.14351"> arXiv:2411.14351 </a> (cross-list from stat.ML) [<a href="/pdf/2411.14351" title="Download PDF" id="pdf-2411.14351" aria-labelledby="pdf-2411.14351">pdf</a>, <a href="https://arxiv.org/html/2411.14351v1" title="View HTML" id="html-2411.14351" aria-labelledby="html-2411.14351" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14351" title="Other formats" id="oth-2411.14351" aria-labelledby="oth-2411.14351">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Indiscriminate Disruption of Conditional Inference on Multivariate Gaussians </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Caballero,+W+N">William N. Caballero</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=LaRosa,+M">Matthew LaRosa</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Fisher,+A">Alexander Fisher</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Tarokh,+V">Vahid Tarokh</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 30 pages, 6 figures; 4 tables </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Cryptography and Security (cs.CR); Machine Learning (cs.LG); Applications (stat.AP) </div> <p class='mathjax'> The multivariate Gaussian distribution underpins myriad operations-research, decision-analytic, and machine-learning models (e.g., Bayesian optimization, Gaussian influence diagrams, and variational autoencoders). However, despite recent advances in adversarial machine learning (AML), inference for Gaussian models in the presence of an adversary is notably understudied. Therefore, we consider a self-interested attacker who wishes to disrupt a decisionmaker's conditional inference and subsequent actions by corrupting a set of evidentiary variables. To avoid detection, the attacker also desires the attack to appear plausible wherein plausibility is determined by the density of the corrupted evidence. We consider white- and grey-box settings such that the attacker has complete and incomplete knowledge about the decisionmaker's underlying multivariate Gaussian distribution, respectively. Select instances are shown to reduce to quadratic and stochastic quadratic programs, and structural properties are derived to inform solution methods. We assess the impact and efficacy of these attacks in three examples, including, real estate evaluation, interest rate estimation and signals processing. Each example leverages an alternative underlying model, thereby highlighting the attacks' broad applicability. Through these applications, we also juxtapose the behavior of the white- and grey-box attacks to understand how uncertainty and structure affect attacker behavior. </p> </div> </dd> <dt> <a name='item416'>[416]</a> <a href ="/abs/2411.14353" title="Abstract" id="2411.14353"> arXiv:2411.14353 </a> (cross-list from eess.IV) [<a href="/pdf/2411.14353" title="Download PDF" id="pdf-2411.14353" aria-labelledby="pdf-2411.14353">pdf</a>, <a href="/format/2411.14353" title="Other formats" id="oth-2411.14353" aria-labelledby="oth-2411.14353">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Enhancing Medical Image Segmentation with Deep Learning and Diffusion Models </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Liu,+H">Houze Liu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Zhou,+T">Tong Zhou</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Xiang,+Y">Yanlin Xiang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Shen,+A">Aoran Shen</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Hu,+J">Jiacheng Hu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Du,+J">Junliang Du</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Computer Vision and Pattern Recognition (cs.CV); Machine Learning (cs.LG) </div> <p class='mathjax'> Medical image segmentation is crucial for accurate clinical diagnoses, yet it faces challenges such as low contrast between lesions and normal tissues, unclear boundaries, and high variability across patients. Deep learning has improved segmentation accuracy and efficiency, but it still relies heavily on expert annotations and struggles with the complexities of medical images. The small size of medical image datasets and the high cost of data acquisition further limit the performance of segmentation networks. Diffusion models, with their iterative denoising process, offer a promising alternative for better detail capture in segmentation. However, they face difficulties in accurately segmenting small targets and maintaining the precision of boundary details. This article discusses the importance of medical image segmentation, the limitations of current deep learning approaches, and the potential of diffusion models to address these challenges. </p> </div> </dd> <dt> <a name='item417'>[417]</a> <a href ="/abs/2411.14378" title="Abstract" id="2411.14378"> arXiv:2411.14378 </a> (cross-list from physics.flu-dyn) [<a href="/pdf/2411.14378" title="Download PDF" id="pdf-2411.14378" aria-labelledby="pdf-2411.14378">pdf</a>, <a href="/format/2411.14378" title="Other formats" id="oth-2411.14378" aria-labelledby="oth-2411.14378">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> CoNFiLD-inlet: Synthetic Turbulence Inflow Using Generative Latent Diffusion Models with Neural Fields </div> <div class='list-authors'><a href="https://arxiv.org/search/physics?searchtype=author&query=Liu,+X">Xin-Yang Liu</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Parikh,+M+H">Meet Hemant Parikh</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Fan,+X">Xiantao Fan</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Du,+P">Pan Du</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Wang,+Q">Qing Wang</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Chen,+Y">Yi-Fan Chen</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Wang,+J">Jian-Xun Wang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 27 pages, 10 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Fluid Dynamics (physics.flu-dyn)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Eddy-resolving turbulence simulations require stochastic inflow conditions that accurately replicate the complex, multi-scale structures of turbulence. Traditional recycling-based methods rely on computationally expensive precursor simulations, while existing synthetic inflow generators often fail to reproduce realistic coherent structures of turbulence. Recent advances in deep learning (DL) have opened new possibilities for inflow turbulence generation, yet many DL-based methods rely on deterministic, autoregressive frameworks prone to error accumulation, resulting in poor robustness for long-term predictions. In this work, we present CoNFiLD-inlet, a novel DL-based inflow turbulence generator that integrates diffusion models with a conditional neural field (CNF)-encoded latent space to produce realistic, stochastic inflow turbulence. By parameterizing inflow conditions using Reynolds numbers, CoNFiLD-inlet generalizes effectively across a wide range of Reynolds numbers ($Re_\tau$ between $10^3$ and $10^4$) without requiring retraining or parameter tuning. Comprehensive validation through a priori and a posteriori tests in Direct Numerical Simulation (DNS) and Wall-Modeled Large Eddy Simulation (WMLES) demonstrates its high fidelity, robustness, and scalability, positioning it as an efficient and versatile solution for inflow turbulence synthesis. </p> </div> </dd> <dt> <a name='item418'>[418]</a> <a href ="/abs/2411.14385" title="Abstract" id="2411.14385"> arXiv:2411.14385 </a> (cross-list from eess.IV) [<a href="/pdf/2411.14385" title="Download PDF" id="pdf-2411.14385" aria-labelledby="pdf-2411.14385">pdf</a>, <a href="https://arxiv.org/html/2411.14385v1" title="View HTML" id="html-2411.14385" aria-labelledby="html-2411.14385" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14385" title="Other formats" id="oth-2411.14385" aria-labelledby="oth-2411.14385">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Enhancing Diagnostic Precision in Gastric Bleeding through Automated Lesion Segmentation: A Deep DuS-KFCM Approach </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Liu,+X">Xian-Xian Liu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Xu,+M">Mingkun Xu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Wei,+Y">Yuanyuan Wei</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Qin,+H">Huafeng Qin</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Song,+Q">Qun Song</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Fong,+S">Simon Fong</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Tien,+F">Feng Tien</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Luo,+W">Wei Luo</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Gao,+J">Juntao Gao</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Zhang,+Z">Zhihua Zhang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Siu,+S">Shirley Siu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> Timely and precise classification and segmentation of gastric bleeding in endoscopic imagery are pivotal for the rapid diagnosis and intervention of gastric complications, which is critical in life-saving medical procedures. Traditional methods grapple with the challenge posed by the indistinguishable intensity values of bleeding tissues adjacent to other gastric structures. Our study seeks to revolutionize this domain by introducing a novel deep learning model, the Dual Spatial Kernelized Constrained Fuzzy C-Means (Deep DuS-KFCM) clustering algorithm. This Hybrid Neuro-Fuzzy system synergizes Neural Networks with Fuzzy Logic to offer a highly precise and efficient identification of bleeding regions. Implementing a two-fold coarse-to-fine strategy for segmentation, this model initially employs the Spatial Kernelized Fuzzy C-Means (SKFCM) algorithm enhanced with spatial intensity profiles and subsequently harnesses the state-of-the-art DeepLabv3+ with ResNet50 architecture to refine the segmentation output. Through extensive experiments across mainstream gastric bleeding and red spots datasets, our Deep DuS-KFCM model demonstrated unprecedented accuracy rates of 87.95%, coupled with a specificity of 96.33%, outperforming contemporary segmentation methods. The findings underscore the model's robustness against noise and its outstanding segmentation capabilities, particularly for identifying subtle bleeding symptoms, thereby presenting a significant leap forward in medical image processing. </p> </div> </dd> <dt> <a name='item419'>[419]</a> <a href ="/abs/2411.14390" title="Abstract" id="2411.14390"> arXiv:2411.14390 </a> (cross-list from cond-mat.dis-nn) [<a href="/pdf/2411.14390" title="Download PDF" id="pdf-2411.14390" aria-labelledby="pdf-2411.14390">pdf</a>, <a href="https://arxiv.org/html/2411.14390v1" title="View HTML" id="html-2411.14390" aria-labelledby="html-2411.14390" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14390" title="Other formats" id="oth-2411.14390" aria-labelledby="oth-2411.14390">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Persistent Homology for Structural Characterization in Disordered Systems </div> <div class='list-authors'><a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Wang,+A">An Wang</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Zou,+L">Li Zou</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 19 pages, 17 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Disordered Systems and Neural Networks (cond-mat.dis-nn)</span>; Materials Science (cond-mat.mtrl-sci); Machine Learning (cs.LG); Mathematical Physics (math-ph) </div> <p class='mathjax'> We propose a unified framework based on persistent homology (PH) to characterize both local and global structures in disordered systems. It can simultaneously generate local and global descriptors using the same algorithm and data structure, and has shown to be highly effective and interpretable in predicting particle rearrangements and classifying global phases. Based on this framework, we define a non-parametric metric, the Separation Index (SI), which not only outperforms traditional bond-orientational order parameters in phase classification tasks but also establishes a connection between particle environments and the global phase structure. Our methods provide an effective framework for understanding and analyzing the properties of disordered materials, with broad potential applications in materials science and even wider studies of complex systems. </p> </div> </dd> <dt> <a name='item420'>[420]</a> <a href ="/abs/2411.14412" title="Abstract" id="2411.14412"> arXiv:2411.14412 </a> (cross-list from quant-ph) [<a href="/pdf/2411.14412" title="Download PDF" id="pdf-2411.14412" aria-labelledby="pdf-2411.14412">pdf</a>, <a href="https://arxiv.org/html/2411.14412v1" title="View HTML" id="html-2411.14412" aria-labelledby="html-2411.14412" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14412" title="Other formats" id="oth-2411.14412" aria-labelledby="oth-2411.14412">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Adversarial Poisoning Attack on Quantum Machine Learning Models </div> <div class='list-authors'><a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Kundu,+S">Satwik Kundu</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Ghosh,+S">Swaroop Ghosh</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Quantum Physics (quant-ph)</span>; Cryptography and Security (cs.CR); Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> With the growing interest in Quantum Machine Learning (QML) and the increasing availability of quantum computers through cloud providers, addressing the potential security risks associated with QML has become an urgent priority. One key concern in the QML domain is the threat of data poisoning attacks in the current quantum cloud setting. Adversarial access to training data could severely compromise the integrity and availability of QML models. Classical data poisoning techniques require significant knowledge and training to generate poisoned data, and lack noise resilience, making them ineffective for QML models in the Noisy Intermediate Scale Quantum (NISQ) era. In this work, we first propose a simple yet effective technique to measure intra-class encoder state similarity (ESS) by analyzing the outputs of encoding circuits. Leveraging this approach, we introduce a quantum indiscriminate data poisoning attack, QUID. Through extensive experiments conducted in both noiseless and noisy environments (e.g., IBM\_Brisbane's noise), across various architectures and datasets, QUID achieves up to $92\%$ accuracy degradation in model performance compared to baseline models and up to $75\%$ accuracy degradation compared to random label-flipping. We also tested QUID against state-of-the-art classical defenses, with accuracy degradation still exceeding $50\%$, demonstrating its effectiveness. This work represents the first attempt to reevaluate data poisoning attacks in the context of QML. </p> </div> </dd> <dt> <a name='item421'>[421]</a> <a href ="/abs/2411.14416" title="Abstract" id="2411.14416"> arXiv:2411.14416 </a> (cross-list from quant-ph) [<a href="/pdf/2411.14416" title="Download PDF" id="pdf-2411.14416" aria-labelledby="pdf-2411.14416">pdf</a>, <a href="https://arxiv.org/html/2411.14416v1" title="View HTML" id="html-2411.14416" aria-labelledby="html-2411.14416" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14416" title="Other formats" id="oth-2411.14416" aria-labelledby="oth-2411.14416">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> QMA vs. QCMA and Pseudorandomness </div> <div class='list-authors'><a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Liu,+J">Jiahui Liu</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Mutreja,+S">Saachi Mutreja</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Yuen,+H">Henry Yuen</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Quantum Physics (quant-ph)</span>; Computational Complexity (cs.CC) </div> <p class='mathjax'> We study a longstanding question of Aaronson and Kuperberg on whether there exists a classical oracle separating $\mathsf{QMA}$ from $\mathsf{QCMA}$. Settling this question in either direction would yield insight into the power of quantum proofs over classical proofs. We show that such an oracle exists if a certain quantum pseudorandomness conjecture holds. Roughly speaking, the conjecture posits that quantum algorithms cannot, by making few queries, distinguish between the uniform distribution over permutations versus permutations drawn from so-called "dense" distributions. <br>Our result can be viewed as establishing a "win-win" scenario: \emph{either} there is a classical oracle separation of $\QMA$ from $\QCMA$, \emph{or} there is quantum advantage in distinguishing pseudorandom distributions on permutations. </p> </div> </dd> <dt> <a name='item422'>[422]</a> <a href ="/abs/2411.14418" title="Abstract" id="2411.14418"> arXiv:2411.14418 </a> (cross-list from eess.IV) [<a href="/pdf/2411.14418" title="Download PDF" id="pdf-2411.14418" aria-labelledby="pdf-2411.14418">pdf</a>, <a href="https://arxiv.org/html/2411.14418v1" title="View HTML" id="html-2411.14418" aria-labelledby="html-2411.14418" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.14418" title="Other formats" id="oth-2411.14418" aria-labelledby="oth-2411.14418">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Multimodal 3D Brain Tumor Segmentation with Adversarial Training and Conditional Random Field </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Jiang,+L">Lan Jiang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Zheng,+Y">Yuchao Zheng</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Yu,+M">Miao Yu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Zhang,+H">Haiqing Zhang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Aladwani,+F">Fatemah Aladwani</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Perelli,+A">Alessandro Perelli</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 13 pages, 7 figures, Annual Conference on Medical Image Understanding and Analysis (MIUA) 2024 </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> Medical Image Understanding and Analysis (MIUA), Lecture Notes in Computer Science, Springer, vol. 14859, 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> Accurate brain tumor segmentation remains a challenging task due to structural complexity and great individual differences of gliomas. Leveraging the pre-eminent detail resilience of CRF and spatial feature extraction capacity of V-net, we propose a multimodal 3D Volume Generative Adversarial Network (3D-vGAN) for precise segmentation. The model utilizes Pseudo-3D for V-net improvement, adds conditional random field after generator and use original image as supplemental guidance. Results, using the BraTS-2018 dataset, show that 3D-vGAN outperforms classical segmentation models, including U-net, Gan, FCN and 3D V-net, reaching specificity over 99.8%. </p> </div> </dd> </dl> <dl id='articles'> <h3>Replacement submissions (showing 247 of 247 entries)</h3> <dt> <a name='item423'>[423]</a> <a href ="/abs/2003.03653" title="Abstract" id="2003.03653"> arXiv:2003.03653 </a> (replaced) [<a href="/pdf/2003.03653" title="Download PDF" id="pdf-2003.03653" aria-labelledby="pdf-2003.03653">pdf</a>, <a href="https://arxiv.org/html/2003.03653v4" title="View HTML" id="html-2003.03653" aria-labelledby="html-2003.03653" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2003.03653" title="Other formats" id="oth-2003.03653" aria-labelledby="oth-2003.03653">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> SalsaNext: Fast, Uncertainty-aware Semantic Segmentation of LiDAR Point Clouds for Autonomous Driving </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Cortinhal,+T">Tiago Cortinhal</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tzelepis,+G">George Tzelepis</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Aksoy,+E+E">Eren Erdal Aksoy</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> In this paper, we introduce SalsaNext for the uncertainty-aware semantic segmentation of a full 3D LiDAR point cloud in real-time. SalsaNext is the next version of SalsaNet [1] which has an encoder-decoder architecture where the encoder unit has a set of ResNet blocks and the decoder part combines upsampled features from the residual blocks. In contrast to SalsaNet, we introduce a new context module, replace the ResNet encoder blocks with a new residual dilated convolution stack with gradually increasing receptive fields and add the pixel-shuffle layer in the decoder. Additionally, we switch from stride convolution to average pooling and also apply central dropout treatment. To directly optimize the Jaccard index, we further combine the weighted cross-entropy loss with Lovasz-Softmax loss [2]. We finally inject a Bayesian treatment to compute the epistemic and aleatoric uncertainties for each point in the cloud. We provide a thorough quantitative evaluation on the Semantic-KITTI dataset [3], which demonstrates that the proposed SalsaNext outperforms other state-of-the-art semantic segmentation networks and ranks first on the Semantic-KITTI leaderboard. We also release our source code <a href="https://github.com/TiagoCortinhal/SalsaNext" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item424'>[424]</a> <a href ="/abs/2105.04086" title="Abstract" id="2105.04086"> arXiv:2105.04086 </a> (replaced) [<a href="/pdf/2105.04086" title="Download PDF" id="pdf-2105.04086" aria-labelledby="pdf-2105.04086">pdf</a>, <a href="https://arxiv.org/html/2105.04086v2" title="View HTML" id="html-2105.04086" aria-labelledby="html-2105.04086" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2105.04086" title="Other formats" id="oth-2105.04086" aria-labelledby="oth-2105.04086">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Deep Reinforcement Learning-based Methods for Resource Scheduling in Cloud Computing: A Review and Future Directions </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+G">Guangyao Zhou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tian,+W">Wenhong Tian</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Buyya,+R">Rajkumar Buyya</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xue,+R">Ruini Xue</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Song,+L">Liang Song</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 22 pages,14 figures </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> Artif. Intell. Rev. 57 (5) (2024) 124 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Distributed, Parallel, and Cluster Computing (cs.DC)</span> </div> <p class='mathjax'> As the quantity and complexity of information processed by software systems increase, large-scale software systems have an increasing requirement for high-performance distributed computing systems. With the acceleration of the Internet in Web 2.0, Cloud computing as a paradigm to provide dynamic, uncertain and elastic services has shown superiorities to meet the computing needs dynamically. Without an appropriate scheduling approach, extensive Cloud computing may cause high energy consumptions and high cost, in addition that high energy consumption will cause massive carbon dioxide emissions. Moreover, inappropriate scheduling will reduce the service life of physical devices as well as increase response time to users' request. Hence, efficient scheduling of resource or optimal allocation of request, that usually a NP-hard problem, is one of the prominent issues in emerging trends of Cloud computing. Focusing on improving quality of service (QoS), reducing cost and abating contamination, researchers have conducted extensive work on resource scheduling problems of Cloud computing over years. Nevertheless, growing complexity of Cloud computing, that the super-massive distributed system, is limiting the application of scheduling approaches. Machine learning, a utility method to tackle problems in complex scenes, is used to resolve the resource scheduling of Cloud computing as an innovative idea in recent years. Deep reinforcement learning (DRL), a combination of deep learning (DL) and reinforcement learning (RL), is one branch of the machine learning and has a considerable prospect in resource scheduling of Cloud computing. This paper surveys the methods of resource scheduling with focus on DRL-based scheduling approaches in Cloud computing, also reviews the application of DRL as well as discusses challenges and future directions of DRL in scheduling of Cloud computing. </p> </div> </dd> <dt> <a name='item425'>[425]</a> <a href ="/abs/2202.01694" title="Abstract" id="2202.01694"> arXiv:2202.01694 </a> (replaced) [<a href="/pdf/2202.01694" title="Download PDF" id="pdf-2202.01694" aria-labelledby="pdf-2202.01694">pdf</a>, <a href="https://arxiv.org/html/2202.01694v4" title="View HTML" id="html-2202.01694" aria-labelledby="html-2202.01694" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2202.01694" title="Other formats" id="oth-2202.01694" aria-labelledby="oth-2202.01694">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Variational Nearest Neighbor Gaussian Process </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+L">Luhuan Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pleiss,+G">Geoff Pleiss</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cunningham,+J">John Cunningham</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Machine Learning (stat.ML) </div> <p class='mathjax'> Variational approximations to Gaussian processes (GPs) typically use a small set of inducing points to form a low-rank approximation to the covariance matrix. In this work, we instead exploit a sparse approximation of the precision matrix. We propose variational nearest neighbor Gaussian process (VNNGP), which introduces a prior that only retains correlations within $K$ nearest-neighboring observations, thereby inducing sparse precision structure. Using the variational framework, VNNGP's objective can be factorized over both observations and inducing points, enabling stochastic optimization with a time complexity of $O(K^3)$. Hence, we can arbitrarily scale the inducing point size, even to the point of putting inducing points at every observed location. We compare VNNGP to other scalable GPs through various experiments, and demonstrate that VNNGP (1) can dramatically outperform low-rank methods, and (2) is less prone to overfitting than other nearest neighbor methods. </p> </div> </dd> <dt> <a name='item426'>[426]</a> <a href ="/abs/2209.15179" title="Abstract" id="2209.15179"> arXiv:2209.15179 </a> (replaced) [<a href="/pdf/2209.15179" title="Download PDF" id="pdf-2209.15179" aria-labelledby="pdf-2209.15179">pdf</a>, <a href="https://arxiv.org/html/2209.15179v4" title="View HTML" id="html-2209.15179" aria-labelledby="html-2209.15179" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2209.15179" title="Other formats" id="oth-2209.15179" aria-labelledby="oth-2209.15179">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Physical Adversarial Attack meets Computer Vision: A Decade Survey </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wei,+H">Hui Wei</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tang,+H">Hao Tang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jia,+X">Xuemei Jia</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Z">Zhixiang Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+H">Hanxun Yu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+Z">Zhubo Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Satoh,+S">Shin'ichi Satoh</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Van+Gool,+L">Luc Van Gool</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Z">Zheng Wang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Published at IEEE TPAMI. GitHub:<a href="https://github.com/weihui1308/PAA" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Despite the impressive achievements of Deep Neural Networks (DNNs) in computer vision, their vulnerability to adversarial attacks remains a critical concern. Extensive research has demonstrated that incorporating sophisticated perturbations into input images can lead to a catastrophic degradation in DNNs' performance. This perplexing phenomenon not only exists in the digital space but also in the physical world. Consequently, it becomes imperative to evaluate the security of DNNs-based systems to ensure their safe deployment in real-world scenarios, particularly in security-sensitive applications. To facilitate a profound understanding of this topic, this paper presents a comprehensive overview of physical adversarial attacks. Firstly, we distill four general steps for launching physical adversarial attacks. Building upon this foundation, we uncover the pervasive role of artifacts carrying adversarial perturbations in the physical world. These artifacts influence each step. To denote them, we introduce a new term: adversarial medium. Then, we take the first step to systematically evaluate the performance of physical adversarial attacks, taking the adversarial medium as a first attempt. Our proposed evaluation metric, hiPAA, comprises six perspectives: Effectiveness, Stealthiness, Robustness, Practicability, Aesthetics, and Economics. We also provide comparative results across task categories, together with insightful observations and suggestions for future research directions. </p> </div> </dd> <dt> <a name='item427'>[427]</a> <a href ="/abs/2210.04359" title="Abstract" id="2210.04359"> arXiv:2210.04359 </a> (replaced) [<a href="/pdf/2210.04359" title="Download PDF" id="pdf-2210.04359" aria-labelledby="pdf-2210.04359">pdf</a>, <a href="https://arxiv.org/html/2210.04359v3" title="View HTML" id="html-2210.04359" aria-labelledby="html-2210.04359" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2210.04359" title="Other formats" id="oth-2210.04359" aria-labelledby="oth-2210.04359">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Fine-Grained Detection of Solidarity for Women and Migrants in 155 Years of German Parliamentary Debates </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Kostikova,+A">Aida Kostikova</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Paassen,+B">Benjamin Paassen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Beese,+D">Dominik Beese</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=P%C3%BCtz,+O">Ole P眉tz</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wiedemann,+G">Gregor Wiedemann</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Eger,+S">Steffen Eger</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> EMNLP 2024 (Main Conference) Camera-Ready Version </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Machine Learning (cs.LG); Social and Information Networks (cs.SI) </div> <p class='mathjax'> Solidarity is a crucial concept to understand social relations in societies. In this paper, we explore fine-grained solidarity frames to study solidarity towards women and migrants in German parliamentary debates between 1867 and 2022. Using 2,864 manually annotated text snippets (with a cost exceeding 18k Euro), we evaluate large language models (LLMs) like Llama 3, GPT-3.5, and GPT-4. We find that GPT-4 outperforms other LLMs, approaching human annotation quality. Using GPT-4, we automatically annotate more than 18k further instances (with a cost of around 500 Euro) across 155 years and find that solidarity with migrants outweighs anti-solidarity but that frequencies and solidarity types shift over time. Most importantly, group-based notions of (anti-)solidarity fade in favor of compassionate solidarity, focusing on the vulnerability of migrant groups, and exchange-based anti-solidarity, focusing on the lack of (economic) contribution. Our study highlights the interplay of historical events, socio-economic needs, and political ideologies in shaping migration discourse and social cohesion. We also show that powerful LLMs, if carefully prompted, can be cost-effective alternatives to human annotation for hard social scientific tasks. </p> </div> </dd> <dt> <a name='item428'>[428]</a> <a href ="/abs/2212.09010" title="Abstract" id="2212.09010"> arXiv:2212.09010 </a> (replaced) [<a href="/pdf/2212.09010" title="Download PDF" id="pdf-2212.09010" aria-labelledby="pdf-2212.09010">pdf</a>, <a href="https://arxiv.org/html/2212.09010v5" title="View HTML" id="html-2212.09010" aria-labelledby="html-2212.09010" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2212.09010" title="Other formats" id="oth-2212.09010" aria-labelledby="oth-2212.09010">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Risk-Sensitive Reinforcement Learning with Exponential Criteria </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Noorani,+E">Erfaun Noorani</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Mavridis,+C">Christos Mavridis</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Baras,+J">John Baras</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Systems and Control (eess.SY)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> While reinforcement learning has shown experimental success in a number of applications, it is known to be sensitive to noise and perturbations in the parameters of the system, leading to high variance in the total reward amongst different episodes in slightly different environments. To introduce robustness, as well as sample efficiency, risk-sensitive reinforcement learning methods are being thoroughly studied. In this work, we provide a definition of robust reinforcement learning policies and formulate a risk-sensitive reinforcement learning problem to approximate them, by solving an optimization problem with respect to a modified objective based on exponential criteria. In particular, we study a model-free risk-sensitive variation of the widely-used Monte Carlo Policy Gradient algorithm and introduce a novel risk-sensitive online Actor-Critic algorithm based on solving a multiplicative Bellman equation using stochastic approximation updates. Analytical results suggest that the use of exponential criteria generalizes commonly used ad-hoc regularization approaches, improves sample efficiency, and introduces robustness with respect to perturbations in the model parameters and the environment. The implementation, performance, and robustness properties of the proposed methods are evaluated in simulated experiments. </p> </div> </dd> <dt> <a name='item429'>[429]</a> <a href ="/abs/2301.10088" title="Abstract" id="2301.10088"> arXiv:2301.10088 </a> (replaced) [<a href="/pdf/2301.10088" title="Download PDF" id="pdf-2301.10088" aria-labelledby="pdf-2301.10088">pdf</a>, <a href="https://arxiv.org/html/2301.10088v4" title="View HTML" id="html-2301.10088" aria-labelledby="html-2301.10088" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2301.10088" title="Other formats" id="oth-2301.10088" aria-labelledby="oth-2301.10088">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Linear Arboreal Categories </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Abramsky,+S">Samson Abramsky</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Montacute,+Y">Yo脿v Montacute</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shah,+N">Nihil Shah</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Logic in Computer Science (cs.LO)</span>; Category Theory (math.CT); Logic (math.LO) </div> <p class='mathjax'> Arboreal categories, introduced by Abramsky and Reggio, axiomatise categories with tree-shaped objects. These categories provide a categorical language for formalising behavioural notions such as simulation, bisimulation, and resource-indexing. In this paper, we strengthen the axioms of an arboreal category to exclude `branching' behaviour, obtaining a notion of `linear arboreal category'. We then demonstrate that every arboreal category satisfying a linearisability condition has an associated linear arboreal subcategory related via an adjunction. This identifies the relationship between the pebble-relation comonad, of Montacute and Shah, and the pebbling comonad, of Abramsky, Dawar, and Wang, and generalises it further. As another outcome of this new framework, we obtain a linear variant of the arboreal category for modal logic. By doing so we recover different linear-time equivalences between transition systems as instances of their categorical definitions. We conclude with new preservation and characterisation theorems relating trace inclusion and trace equivalence with different linear fragments of modal logic. </p> </div> </dd> <dt> <a name='item430'>[430]</a> <a href ="/abs/2303.05327" title="Abstract" id="2303.05327"> arXiv:2303.05327 </a> (replaced) [<a href="/pdf/2303.05327" title="Download PDF" id="pdf-2303.05327" aria-labelledby="pdf-2303.05327">pdf</a>, <a href="https://arxiv.org/html/2303.05327v2" title="View HTML" id="html-2303.05327" aria-labelledby="html-2303.05327" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2303.05327" title="Other formats" id="oth-2303.05327" aria-labelledby="oth-2303.05327">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Direct Access for Answers to Conjunctive Queries with Aggregation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Eldar,+I">Idan Eldar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Carmeli,+N">Nofar Carmeli</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kimelfeld,+B">Benny Kimelfeld</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Databases (cs.DB)</span>; Data Structures and Algorithms (cs.DS) </div> <p class='mathjax'> We study the fine-grained complexity of conjunctive queries with grouping and aggregation. For common aggregate functions (e.g., min, max, count, sum), such a query can be phrased as an ordinary conjunctive query over a database annotated with a suitable commutative semiring. We investigate the ability to evaluate such queries by constructing in loglinear time a data structure that provides logarithmic-time direct access to the answers ordered by a given lexicographic order. This task is nontrivial since the number of answers might be larger than loglinear in the size of the input, so the data structure needs to provide a compact representation of the space of answers. In the absence of aggregation and annotation, past research established a sufficient tractability condition on queries and orders. For queries without self-joins, this condition is not just sufficient, but also necessary (under conventional lower-bound assumptions in fine-grained complexity). <br>We show that all past results continue to hold for annotated databases, assuming that the annotation itself does not participate in the lexicographic order. Yet, past algorithms do not apply to the count-distinct aggregation, which has no efficient representation as a commutative semiring; for this aggregation, we establish the corresponding tractability condition. We then show how the complexity of the problem changes when we include the aggregate and annotation value in the order. We also study the impact of having all relations but one annotated by the multiplicative identity (one), as happens when we translate aggregate queries into semiring annotations, and having a semiring with an idempotent addition, such as the case of min, max, and count-distinct over a logarithmic-size domain. </p> </div> </dd> <dt> <a name='item431'>[431]</a> <a href ="/abs/2303.08032" title="Abstract" id="2303.08032"> arXiv:2303.08032 </a> (replaced) [<a href="/pdf/2303.08032" title="Download PDF" id="pdf-2303.08032" aria-labelledby="pdf-2303.08032">pdf</a>, <a href="https://arxiv.org/html/2303.08032v3" title="View HTML" id="html-2303.08032" aria-labelledby="html-2303.08032" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2303.08032" title="Other formats" id="oth-2303.08032" aria-labelledby="oth-2303.08032">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Verifying the Robustness of Automatic Credibility Assessment </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Przyby%C5%82a,+P">Piotr Przyby艂a</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shvets,+A">Alexander Shvets</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Saggion,+H">Horacio Saggion</a></div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> Published in Natural Language Processing , 2024 , pp. 1 - 29 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Text classification methods have been widely investigated as a way to detect content of low credibility: fake news, social media bots, propaganda, etc. Quite accurate models (likely based on deep neural networks) help in moderating public electronic platforms and often cause content creators to face rejection of their submissions or removal of already published texts. Having the incentive to evade further detection, content creators try to come up with a slightly modified version of the text (known as an attack with an adversarial example) that exploit the weaknesses of classifiers and result in a different output. Here we systematically test the robustness of common text classifiers against available attacking techniques and discover that, indeed, meaning-preserving changes in input text can mislead the models. The approaches we test focus on finding vulnerable spans in text and replacing individual characters or words, taking into account the similarity between the original and replacement content. We also introduce BODEGA: a benchmark for testing both victim models and attack methods on four misinformation detection tasks in an evaluation framework designed to simulate real use-cases of content moderation. The attacked tasks include (1) fact checking and detection of (2) hyperpartisan news, (3) propaganda and (4) rumours. Our experimental results show that modern large language models are often more vulnerable to attacks than previous, smaller solutions, e.g. attacks on GEMMA being up to 27\% more successful than those on BERT. Finally, we manually analyse a subset adversarial examples and check what kinds of modifications are used in successful attacks. </p> </div> </dd> <dt> <a name='item432'>[432]</a> <a href ="/abs/2304.03365" title="Abstract" id="2304.03365"> arXiv:2304.03365 </a> (replaced) [<a href="/pdf/2304.03365" title="Download PDF" id="pdf-2304.03365" aria-labelledby="pdf-2304.03365">pdf</a>, <a href="https://arxiv.org/html/2304.03365v3" title="View HTML" id="html-2304.03365" aria-labelledby="html-2304.03365" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2304.03365" title="Other formats" id="oth-2304.03365" aria-labelledby="oth-2304.03365">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Decision-Focused Model-based Reinforcement Learning for Reward Transfer </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Sharma,+A">Abhishek Sharma</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Parbhoo,+S">Sonali Parbhoo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gottesman,+O">Omer Gottesman</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Doshi-Velez,+F">Finale Doshi-Velez</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Machine Learning for Healthcare (MLHC) 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Model-based reinforcement learning (MBRL) provides a way to learn a transition model of the environment, which can then be used to plan personalized policies for different patient cohorts and to understand the dynamics involved in the decision-making process. However, standard MBRL algorithms are either sensitive to changes in the reward function or achieve suboptimal performance on the task when the transition model is restricted. Motivated by the need to use simple and interpretable models in critical domains such as healthcare, we propose a novel robust decision-focused (RDF) algorithm that learns a transition model that achieves high returns while being robust to changes in the reward function. We demonstrate our RDF algorithm can be used with several model classes and planning algorithms. We also provide theoretical and empirical evidence, on a variety of simulators and real patient data, that RDF can learn simple yet effective models that can be used to plan personalized policies. </p> </div> </dd> <dt> <a name='item433'>[433]</a> <a href ="/abs/2304.12158" title="Abstract" id="2304.12158"> arXiv:2304.12158 </a> (replaced) [<a href="/pdf/2304.12158" title="Download PDF" id="pdf-2304.12158" aria-labelledby="pdf-2304.12158">pdf</a>, <a href="/format/2304.12158" title="Other formats" id="oth-2304.12158" aria-labelledby="oth-2304.12158">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> On the Computability of Measures of Regular Sets of Infinite Trees </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Niwi%C5%84ski,+D">Damian Niwi艅ski</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Parys,+P">Pawe艂 Parys</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Skrzypczak,+M">Micha艂 Skrzypczak</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Logic in Computer Science (cs.LO)</span> </div> <p class='mathjax'> The Rabin tree theorem yields an algorithm to solve the satisfiability problem for monadic second-order logic over infinite trees. Here we solve the probabilistic variant of this problem. Namely, we show how to compute the probability that a randomly chosen tree satisfies a given formula. We additionally show that this probability is an algebraic number. This closes a line of research where similar results were shown for formalisms weaker than the full monadic second-order logic. </p> </div> </dd> <dt> <a name='item434'>[434]</a> <a href ="/abs/2305.14336" title="Abstract" id="2305.14336"> arXiv:2305.14336 </a> (replaced) [<a href="/pdf/2305.14336" title="Download PDF" id="pdf-2305.14336" aria-labelledby="pdf-2305.14336">pdf</a>, <a href="https://arxiv.org/html/2305.14336v5" title="View HTML" id="html-2305.14336" aria-labelledby="html-2305.14336" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2305.14336" title="Other formats" id="oth-2305.14336" aria-labelledby="oth-2305.14336">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Schema-Driven Information Extraction from Heterogeneous Tables </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Bai,+F">Fan Bai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kang,+J">Junmo Kang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Stanovsky,+G">Gabriel Stanovsky</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Freitag,+D">Dayne Freitag</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dredze,+M">Mark Dredze</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ritter,+A">Alan Ritter</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted to EMNLP 2024 Findings </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span> </div> <p class='mathjax'> In this paper, we explore the question of whether large language models can support cost-efficient information extraction from tables. We introduce schema-driven information extraction, a new task that transforms tabular data into structured records following a human-authored schema. To assess various LLM's capabilities on this task, we present a benchmark comprised of tables from four diverse domains: machine learning papers, chemistry literature, material science journals, and webpages. We use this collection of annotated tables to evaluate the ability of open-source and API-based language models to extract information from tables covering diverse domains and data formats. Our experiments demonstrate that surprisingly competitive performance can be achieved without requiring task-specific pipelines or labels, achieving F1 scores ranging from 74.2 to 96.1, while maintaining cost efficiency. Moreover, through detailed ablation studies and analyses, we investigate the factors contributing to model success and validate the practicality of distilling compact models to reduce API reliance. </p> </div> </dd> <dt> <a name='item435'>[435]</a> <a href ="/abs/2305.15738" title="Abstract" id="2305.15738"> arXiv:2305.15738 </a> (replaced) [<a href="/pdf/2305.15738" title="Download PDF" id="pdf-2305.15738" aria-labelledby="pdf-2305.15738">pdf</a>, <a href="https://arxiv.org/html/2305.15738v3" title="View HTML" id="html-2305.15738" aria-labelledby="html-2305.15738" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2305.15738" title="Other formats" id="oth-2305.15738" aria-labelledby="oth-2305.15738">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Maximum Weight Independent Set in Graphs with no Long Claws in Quasi-Polynomial Time </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Gartland,+P">Peter Gartland</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lokshtanov,+D">Daniel Lokshtanov</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Masa%C5%99%C3%ADk,+T">Tom谩拧 Masa艡铆k</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pilipczuk,+M">Marcin Pilipczuk</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pilipczuk,+M">Micha艂 Pilipczuk</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rz%C4%85%C5%BCewski,+P">Pawe艂 Rz膮偶ewski</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 59 pages, 4 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Data Structures and Algorithms (cs.DS)</span> </div> <p class='mathjax'> We show that the \textsc{Maximum Weight Independent Set} problem (\textsc{MWIS}) can be solved in quasi-polynomial time on $H$-free graphs (graphs excluding a fixed graph $H$ as an induced subgraph) for every $H$ whose every connected component is a path or a subdivided claw (i.e., a tree with at most three leaves). This completes the dichotomy of the complexity of \textsc{MWIS} in $\mathcal{F}$-free graphs for any finite set $\mathcal{F}$ of graphs into NP-hard cases and cases solvable in quasi-polynomial time, and corroborates the conjecture that the cases not known to be NP-hard are actually polynomial-time solvable. <br>The key graph-theoretic ingredient in our result is as follows. Fix an integer $t \geq 1$. Let $S_{t,t,t}$ be the graph created from three paths on $t$ edges by identifying one endpoint of each path into a single vertex. We show that, given a graph $G$, one can in polynomial time find either an induced $S_{t,t,t}$ in $G$, or a balanced separator consisting of $\Oh(\log |V(G)|)$ vertex neighborhoods in $G$, or an extended strip decomposition of $G$ (a decomposition almost as useful for recursion for \textsc{MWIS} as a partition into connected components) with each particle of weight multiplicatively smaller than the weight of $G$. This is a strengthening of a result of Majewski et al.\ [ICALP~2022] which provided such an extended strip decomposition only after the deletion of $\Oh(\log |V(G)|)$ vertex neighborhoods. To reach the final result, we employ an involved branching strategy that relies on the structural lemma presented above. </p> </div> </dd> <dt> <a name='item436'>[436]</a> <a href ="/abs/2306.02243" title="Abstract" id="2306.02243"> arXiv:2306.02243 </a> (replaced) [<a href="/pdf/2306.02243" title="Download PDF" id="pdf-2306.02243" aria-labelledby="pdf-2306.02243">pdf</a>, <a href="https://arxiv.org/html/2306.02243v3" title="View HTML" id="html-2306.02243" aria-labelledby="html-2306.02243" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2306.02243" title="Other formats" id="oth-2306.02243" aria-labelledby="oth-2306.02243">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Retrieval-Enhanced Visual Prompt Learning for Few-shot Classification </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Rong,+J">Jintao Rong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+H">Hao Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ou,+L">Linlin Ou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+T">Tianxiao Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+X">Xinyi Yu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Y">Yifan Liu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> The Contrastive Language-Image Pretraining (CLIP) model has been widely used in various downstream vision tasks. The few-shot learning paradigm has been widely adopted to augment its capacity for these tasks. However, current paradigms may struggle with fine-grained classification, such as satellite image recognition, due to widening domain gaps. To address this limitation, we propose retrieval-enhanced visual prompt learning (RePrompt), which introduces retrieval mechanisms to cache and reuse the knowledge of downstream tasks. RePrompt constructs a retrieval database from either training examples or external data if available, and uses a retrieval mechanism to enhance multiple stages of a simple prompt learning baseline, thus narrowing the domain gap. During inference, our enhanced model can reference similar samples brought by retrieval to make more accurate predictions. A detailed analysis reveals that retrieval helps to improve the distribution of late features, thus, improving generalization for downstream tasks. Reprompt attains state-of-the-art performance on a wide range of vision datasets, including 11 image datasets, 3 video datasets, 1 multi-view dataset, and 4 domain generalization benchmarks. </p> </div> </dd> <dt> <a name='item437'>[437]</a> <a href ="/abs/2307.11229" title="Abstract" id="2307.11229"> arXiv:2307.11229 </a> (replaced) [<a href="/pdf/2307.11229" title="Download PDF" id="pdf-2307.11229" aria-labelledby="pdf-2307.11229">pdf</a>, <a href="/format/2307.11229" title="Other formats" id="oth-2307.11229" aria-labelledby="oth-2307.11229">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Convergent Finite Element Scheme for the Q-Tensor Model of Liquid Crystals Subjected to an Electric Field </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Hirsch,+M">Max Hirsch</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Weber,+F">Franziska Weber</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Numerical Analysis (math.NA)</span>; Analysis of PDEs (math.AP) </div> <p class='mathjax'> We study the Landau-de Gennes Q-tensor model of liquid crystals subjected to an electric field and develop a fully discrete numerical scheme for its solution. The scheme uses a convex splitting of the bulk potential, and we introduce a truncation operator for the Q-tensors to ensure well-posedness of the problem. We prove the stability and well-posedness of the scheme. Finally, making a restriction on the admissible parameters of the scheme, we show that up to a subsequence, solutions to the fully discrete scheme converge to weak solutions of the Q-tensor model as the time step and mesh are refined. We then present numerical results computed by the numerical scheme, among which we show that it is possible to simulate the Fr茅edericksz transition with this scheme. </p> </div> </dd> <dt> <a name='item438'>[438]</a> <a href ="/abs/2308.00090" title="Abstract" id="2308.00090"> arXiv:2308.00090 </a> (replaced) [<a href="/pdf/2308.00090" title="Download PDF" id="pdf-2308.00090" aria-labelledby="pdf-2308.00090">pdf</a>, <a href="https://arxiv.org/html/2308.00090v3" title="View HTML" id="html-2308.00090" aria-labelledby="html-2308.00090" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2308.00090" title="Other formats" id="oth-2308.00090" aria-labelledby="oth-2308.00090">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> VG-SSL: Benchmarking Self-supervised Representation Learning Approaches for Visual Geo-localization </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Xiao,+J">Jiuhong Xiao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+G">Gao Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Loianno,+G">Giuseppe Loianno</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 18 pages (including appendix, references), 7 figures, 7 tables. Accepted for WACV 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Visual Geo-localization (VG) is a critical research area for identifying geo-locations from visual inputs, particularly in autonomous navigation for robotics and vehicles. Current VG methods often learn feature extractors from geo-labeled images to create dense, geographically relevant representations. Recent advances in Self-Supervised Learning (SSL) have demonstrated its capability to achieve performance on par with supervised techniques with unlabeled images. This study presents a novel VG-SSL framework, designed for versatile integration and benchmarking of diverse SSL methods for representation learning in VG, featuring a unique geo-related pair strategy, GeoPair. Through extensive performance analysis, we adapt SSL techniques to improve VG on datasets from hand-held and car-mounted cameras used in robotics and autonomous vehicles. Our results show that contrastive learning and information maximization methods yield superior geo-specific representation quality, matching or surpassing the performance of state-of-the-art VG techniques. To our knowledge, This is the first benchmarking study of SSL in VG, highlighting its potential in enhancing geo-specific visual representations for robotics and autonomous vehicles. The code is publicly available at <a href="https://github.com/arplaboratory/VG-SSL" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item439'>[439]</a> <a href ="/abs/2308.06405" title="Abstract" id="2308.06405"> arXiv:2308.06405 </a> (replaced) [<a href="/pdf/2308.06405" title="Download PDF" id="pdf-2308.06405" aria-labelledby="pdf-2308.06405">pdf</a>, <a href="/format/2308.06405" title="Other formats" id="oth-2308.06405" aria-labelledby="oth-2308.06405">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> White-box Membership Inference Attacks against Diffusion Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Pang,+Y">Yan Pang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+T">Tianhao Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kang,+X">Xuhui Kang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huai,+M">Mengdi Huai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Y">Yang Zhang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Cryptography and Security (cs.CR)</span> </div> <p class='mathjax'> Diffusion models have begun to overshadow GANs and other generative models in industrial applications due to their superior image generation performance. The complex architecture of these models furnishes an extensive array of attack features. In light of this, we aim to design membership inference attacks (MIAs) catered to diffusion models. We first conduct an exhaustive analysis of existing MIAs on diffusion models, taking into account factors such as black-box/white-box models and the selection of attack features. We found that white-box attacks are highly applicable in real-world scenarios, and the most effective attacks presently are white-box. Departing from earlier research, which employs model loss as the attack feature for white-box MIAs, we employ model gradients in our attack, leveraging the fact that these gradients provide a more profound understanding of model responses to various samples. We subject these models to rigorous testing across a range of parameters, including training steps, sampling frequency, diffusion steps, and data variance. Across all experimental settings, our method consistently demonstrated near-flawless attack performance, with attack success rate approaching 100% and attack AUCROC near 1.0. We also evaluate our attack against common defense mechanisms, and observe our attacks continue to exhibit commendable performance. </p> </div> </dd> <dt> <a name='item440'>[440]</a> <a href ="/abs/2308.07266" title="Abstract" id="2308.07266"> arXiv:2308.07266 </a> (replaced) [<a href="/pdf/2308.07266" title="Download PDF" id="pdf-2308.07266" aria-labelledby="pdf-2308.07266">pdf</a>, <a href="https://arxiv.org/html/2308.07266v2" title="View HTML" id="html-2308.07266" aria-labelledby="html-2308.07266" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2308.07266" title="Other formats" id="oth-2308.07266" aria-labelledby="oth-2308.07266">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Full Duplex Joint Communications and Sensing for 6G: Opportunities and Challenges </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Sheemar,+C+K">Chandan Kumar Sheemar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Solanki,+S">Sourabh Solanki</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Alexandropoulos,+G+C">George C. Alexandropoulos</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lagunas,+E">Eva Lagunas</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Querol,+J">Jorge Querol</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chatzinotas,+S">Symeon Chatzinotas</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ottersten,+B">Bj枚rn Ottersten</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Information Theory (cs.IT)</span>; Signal Processing (eess.SP) </div> <p class='mathjax'> The paradigm of joint communications and sensing (JCAS) envisions a revolutionary integration of communication and radar functionalities within a unified hardware platform. This novel concept not only opens up unprecedented interoperability opportunities, but also exhibits unique design challenges. To this end, the success of JCAS is highly dependent on efficient full-duplex (FD) operation, which has the potential to enable simultaneous transmission and reception within the same frequency band. While JCAS research is lately expanding, there still exist relevant directions of investigation that hold tremendous potential to profoundly transform the sixth generation (6G), and beyond, cellular networks. This article presents new opportunities and challenges brought up by FD-enabled JCAS, taking into account the key technical peculiarities of FD systems. Unlike simplified JCAS scenarios, we delve into the most comprehensive configuration, encompassing uplink and downlink users, as well as monostatic and bistatic radars, all harmoniously coexisting to jointly push the boundaries of both communications and sensing. The performance improvements resulting from this advancement bring forth numerous new challenges, each meticulously examined and expounded upon. </p> </div> </dd> <dt> <a name='item441'>[441]</a> <a href ="/abs/2308.08812" title="Abstract" id="2308.08812"> arXiv:2308.08812 </a> (replaced) [<a href="/pdf/2308.08812" title="Download PDF" id="pdf-2308.08812" aria-labelledby="pdf-2308.08812">pdf</a>, <a href="https://arxiv.org/html/2308.08812v2" title="View HTML" id="html-2308.08812" aria-labelledby="html-2308.08812" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2308.08812" title="Other formats" id="oth-2308.08812" aria-labelledby="oth-2308.08812">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Fusion of Variational Distribution Priors and Saliency Map Replay for Continual 3D Reconstruction </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Palit,+S">Sanchar Palit</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Biswas,+S">Sandika Biswas</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> at ICVGIP 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Single-image 3D reconstruction is a research challenge focused on predicting 3D object shapes from single-view images. This task requires significant data acquisition to predict both visible and occluded portions of the shape. Furthermore, learning-based methods face the difficulty of creating a comprehensive training dataset for all possible classes. To this end, we propose a continual learning-based 3D reconstruction method where our goal is to design a model using Variational Priors that can still reconstruct the previously seen classes reasonably even after training on new classes. Variational Priors represent abstract shapes and combat forgetting, whereas saliency maps preserve object attributes with less memory usage. This is vital due to resource constraints in storing extensive training data. Additionally, we introduce saliency map-based experience replay to capture global and distinct object features. Thorough experiments show competitive results compared to established methods, both quantitatively and qualitatively. </p> </div> </dd> <dt> <a name='item442'>[442]</a> <a href ="/abs/2309.07072" title="Abstract" id="2309.07072"> arXiv:2309.07072 </a> (replaced) [<a href="/pdf/2309.07072" title="Download PDF" id="pdf-2309.07072" aria-labelledby="pdf-2309.07072">pdf</a>, <a href="https://arxiv.org/html/2309.07072v2" title="View HTML" id="html-2309.07072" aria-labelledby="html-2309.07072" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2309.07072" title="Other formats" id="oth-2309.07072" aria-labelledby="oth-2309.07072">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> The Boundaries of Verifiable Accuracy, Robustness, and Generalisation in Deep Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Bastounis,+A">Alexander Bastounis</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gorban,+A+N">Alexander N. Gorban</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hansen,+A+C">Anders C. Hansen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Higham,+D+J">Desmond J. Higham</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Prokhorov,+D">Danil Prokhorov</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sutton,+O">Oliver Sutton</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tyukin,+I+Y">Ivan Y. Tyukin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+Q">Qinghua Zhou</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Revised version of the original submission </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> In this work, we assess the theoretical limitations of determining guaranteed stability and accuracy of neural networks in classification tasks. We consider classical distribution-agnostic framework and algorithms minimising empirical risks and potentially subjected to some weights regularisation. We show that there is a large family of tasks for which computing and verifying ideal stable and accurate neural networks in the above settings is extremely challenging, if at all possible, even when such ideal solutions exist within the given class of neural architectures. </p> </div> </dd> <dt> <a name='item443'>[443]</a> <a href ="/abs/2309.10011" title="Abstract" id="2309.10011"> arXiv:2309.10011 </a> (replaced) [<a href="/pdf/2309.10011" title="Download PDF" id="pdf-2309.10011" aria-labelledby="pdf-2309.10011">pdf</a>, <a href="https://arxiv.org/html/2309.10011v3" title="View HTML" id="html-2309.10011" aria-labelledby="html-2309.10011" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2309.10011" title="Other formats" id="oth-2309.10011" aria-labelledby="oth-2309.10011">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Universal Photorealistic Style Transfer: A Lightweight and Adaptive Approach </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+R">Rong Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+E">Enyu Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Z">Zhiyuan Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Feng,+A">Andrew Feng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Easley,+S+J">Scott John Easley</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Image and Video Processing (eess.IV) </div> <p class='mathjax'> Photorealistic style transfer aims to apply stylization while preserving the realism and structure of input content. However, existing methods often encounter challenges such as color tone distortions, dependency on pair-wise pre-training, inefficiency with high-resolution inputs, and the need for additional constraints in video style transfer tasks. To address these issues, we propose a Universal Photorealistic Style Transfer (UPST) framework that delivers accurate photorealistic style transfer on high-resolution images and videos without relying on pre-training. Our approach incorporates a lightweight StyleNet for per-instance transfer, ensuring color tone accuracy while supporting high-resolution inputs, maintaining rapid processing speeds, and eliminating the need for pretraining. To further enhance photorealism and efficiency, we introduce instance-adaptive optimization, which features an adaptive coefficient to prioritize content image realism and employs early stopping to accelerate network convergence. Additionally, UPST enables seamless video style transfer without additional constraints due to its strong non-color information preservation ability. Experimental results show that UPST consistently produces photorealistic outputs and significantly reduces GPU memory usage, making it an effective and universal solution for various photorealistic style transfer tasks. </p> </div> </dd> <dt> <a name='item444'>[444]</a> <a href ="/abs/2309.16584" title="Abstract" id="2309.16584"> arXiv:2309.16584 </a> (replaced) [<a href="/pdf/2309.16584" title="Download PDF" id="pdf-2309.16584" aria-labelledby="pdf-2309.16584">pdf</a>, <a href="/format/2309.16584" title="Other formats" id="oth-2309.16584" aria-labelledby="oth-2309.16584">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Collaborative Distributed Machine Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Jin,+D">David Jin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kannengie%C3%9Fer,+N">Niclas Kannengie脽er</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rank,+S">Sascha Rank</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sunyaev,+A">Ali Sunyaev</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Multiagent Systems (cs.MA)</span>; Emerging Technologies (cs.ET); Machine Learning (cs.LG); Software Engineering (cs.SE) </div> <p class='mathjax'> Various collaborative distributed machine learning (CDML) systems, including federated learning systems and swarm learning systems, with diferent key traits were developed to leverage resources for the development and use of machine learning(ML) models in a conidentiality-preserving way. To meet use case requirements, suitable CDML systems need to be selected. However, comparison between CDML systems to assess their suitability for use cases is often diicult. To support comparison of CDML systems and introduce scientiic and practical audiences to the principal functioning and key traits of CDML systems, this work presents a CDML system conceptualization and CDML archetypes. </p> </div> </dd> <dt> <a name='item445'>[445]</a> <a href ="/abs/2310.00616" title="Abstract" id="2310.00616"> arXiv:2310.00616 </a> (replaced) [<a href="/pdf/2310.00616" title="Download PDF" id="pdf-2310.00616" aria-labelledby="pdf-2310.00616">pdf</a>, <a href="https://arxiv.org/html/2310.00616v2" title="View HTML" id="html-2310.00616" aria-labelledby="html-2310.00616" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2310.00616" title="Other formats" id="oth-2310.00616" aria-labelledby="oth-2310.00616">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Towards Understanding Adversarial Transferability in Federated Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+Y">Yijiang Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gao,+Y">Ying Gao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+H">Haohan Wang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Published in Transactions on Machine Learning Research (TMLR) (11/2024) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> We investigate a specific security risk in FL: a group of malicious clients has impacted the model during training by disguising their identities and acting as benign clients but later switching to an adversarial role. They use their data, which was part of the training set, to train a substitute model and conduct transferable adversarial attacks against the federated model. This type of attack is subtle and hard to detect because these clients initially appear to be benign. <br>The key question we address is: How robust is the FL system to such covert attacks, especially compared to traditional centralized learning systems? We empirically show that the proposed attack imposes a high security risk to current FL systems. By using only 3\% of the client's data, we achieve the highest attack rate of over 80\%. To further offer a full understanding of the challenges the FL system faces in transferable attacks, we provide a comprehensive analysis over the transfer robustness of FL across a spectrum of configurations. Surprisingly, FL systems show a higher level of robustness than their centralized counterparts, especially when both systems are equally good at handling regular, non-malicious data. <br>We attribute this increased robustness to two main factors: 1) Decentralized Data Training: Each client trains the model on its own data, reducing the overall impact of any single malicious client. 2) Model Update Averaging: The updates from each client are averaged together, further diluting any malicious alterations. Both practical experiments and theoretical analysis support our conclusions. This research not only sheds light on the resilience of FL systems against hidden attacks but also raises important considerations for their future application and development. </p> </div> </dd> <dt> <a name='item446'>[446]</a> <a href ="/abs/2310.04987" title="Abstract" id="2310.04987"> arXiv:2310.04987 </a> (replaced) [<a href="/pdf/2310.04987" title="Download PDF" id="pdf-2310.04987" aria-labelledby="pdf-2310.04987">pdf</a>, <a href="https://arxiv.org/html/2310.04987v3" title="View HTML" id="html-2310.04987" aria-labelledby="html-2310.04987" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2310.04987" title="Other formats" id="oth-2310.04987" aria-labelledby="oth-2310.04987">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Data-centric Graph Learning: A Survey </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Guo,+Y">Yuxin Guo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bo,+D">Deyu Bo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+C">Cheng Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lu,+Z">Zhiyuan Lu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Z">Zhongjian Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+J">Jixi Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Peng,+Y">Yufei Peng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shi,+C">Chuan Shi</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 20 pages, accepted by IEEE Transactions on Big Data </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Social and Information Networks (cs.SI) </div> <p class='mathjax'> The history of artificial intelligence (AI) has witnessed the significant impact of high-quality data on various deep learning models, such as ImageNet for AlexNet and ResNet. Recently, instead of designing more complex neural architectures as model-centric approaches, the attention of AI community has shifted to data-centric ones, which focuses on better processing data to strengthen the ability of neural models. Graph learning, which operates on ubiquitous topological data, also plays an important role in the era of deep learning. In this survey, we comprehensively review graph learning approaches from the data-centric perspective, and aim to answer three crucial questions: (1) when to modify graph data, (2) what part of the graph data needs modification to unlock the potential of various graph models, and (3) how to safeguard graph models from problematic data influence. Accordingly, we propose a novel taxonomy based on the stages in the graph learning pipeline, and highlight the processing methods for different data structures in the graph data, i.e., topology, feature and label. Furthermore, we analyze some potential problems embedded in graph data and discuss how to solve them in a data-centric manner. Finally, we provide some promising future directions for data-centric graph learning. </p> </div> </dd> <dt> <a name='item447'>[447]</a> <a href ="/abs/2310.06313" title="Abstract" id="2310.06313"> arXiv:2310.06313 </a> (replaced) [<a href="/pdf/2310.06313" title="Download PDF" id="pdf-2310.06313" aria-labelledby="pdf-2310.06313">pdf</a>, <a href="https://arxiv.org/html/2310.06313v4" title="View HTML" id="html-2310.06313" aria-labelledby="html-2310.06313" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2310.06313" title="Other formats" id="oth-2310.06313" aria-labelledby="oth-2310.06313">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Advancing Pose-Guided Image Synthesis with Progressive Conditional Diffusion Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Shen,+F">Fei Shen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ye,+H">Hu Ye</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+J">Jun Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+C">Cong Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Han,+X">Xiao Han</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+W">Wei Yang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted to ICLR 2024. The final version is available at OpenReview: <a href="https://openreview.net/forum?id=rHzapPnCgT" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> The Twelfth International Conference on Learning Representations, 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Recent work has showcased the significant potential of diffusion models in pose-guided person image synthesis. However, owing to the inconsistency in pose between the source and target images, synthesizing an image with a distinct pose, relying exclusively on the source image and target pose information, remains a formidable challenge. This paper presents Progressive Conditional Diffusion Models (PCDMs) that incrementally bridge the gap between person images under the target and source poses through three stages. Specifically, in the first stage, we design a simple prior conditional diffusion model that predicts the global features of the target image by mining the global alignment relationship between pose coordinates and image appearance. Then, the second stage establishes a dense correspondence between the source and target images using the global features from the previous stage, and an inpainting conditional diffusion model is proposed to further align and enhance the contextual features, generating a coarse-grained person image. In the third stage, we propose a refining conditional diffusion model to utilize the coarsely generated image from the previous stage as a condition, achieving texture restoration and enhancing fine-detail consistency. The three-stage PCDMs work progressively to generate the final high-quality and high-fidelity synthesized image. Both qualitative and quantitative results demonstrate the consistency and photorealism of our proposed PCDMs under challenging <a href="http://scenarios.The" rel="external noopener nofollow" class="link-external link-http">this http URL</a> code and model will be available at <a href="https://github.com/tencent-ailab/PCDMs" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item448'>[448]</a> <a href ="/abs/2310.08944" title="Abstract" id="2310.08944"> arXiv:2310.08944 </a> (replaced) [<a href="/pdf/2310.08944" title="Download PDF" id="pdf-2310.08944" aria-labelledby="pdf-2310.08944">pdf</a>, <a href="https://arxiv.org/html/2310.08944v2" title="View HTML" id="html-2310.08944" aria-labelledby="html-2310.08944" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2310.08944" title="Other formats" id="oth-2310.08944" aria-labelledby="oth-2310.08944">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Confidence-based Acquisition Model for Self-supervised Active Learning and Label Correction </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=van+Niekerk,+C">Carel van Niekerk</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Geishauser,+C">Christian Geishauser</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Heck,+M">Michael Heck</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Feng,+S">Shutong Feng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lin,+H">Hsien-chin Lin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lubis,+N">Nurul Lubis</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ruppik,+B">Benjamin Ruppik</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Vukovic,+R">Renato Vukovic</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ga%C5%A1i%C4%87,+M">Milica Ga拧i膰</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted at TACL </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Supervised neural approaches are hindered by their dependence on large, meticulously annotated datasets, a requirement that is particularly cumbersome for sequential tasks. The quality of annotations tends to deteriorate with the transition from expert-based to crowd-sourced labelling. To address these challenges, we present CAMEL (Confidence-based Acquisition Model for Efficient self-supervised active Learning), a pool-based active learning framework tailored to sequential multi-output problems. CAMEL possesses two core features: (1) it requires expert annotators to label only a fraction of a chosen sequence, and (2) it facilitates self-supervision for the remainder of the sequence. By deploying a label correction mechanism, CAMEL can also be utilised for data cleaning. We evaluate CAMEL on two sequential tasks, with a special emphasis on dialogue belief tracking, a task plagued by the constraints of limited and noisy datasets. Our experiments demonstrate that CAMEL significantly outperforms the baselines in terms of efficiency. Furthermore, the data corrections suggested by our method contribute to an overall improvement in the quality of the resulting datasets. </p> </div> </dd> <dt> <a name='item449'>[449]</a> <a href ="/abs/2311.00167" title="Abstract" id="2311.00167"> arXiv:2311.00167 </a> (replaced) [<a href="/pdf/2311.00167" title="Download PDF" id="pdf-2311.00167" aria-labelledby="pdf-2311.00167">pdf</a>, <a href="https://arxiv.org/html/2311.00167v2" title="View HTML" id="html-2311.00167" aria-labelledby="html-2311.00167" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2311.00167" title="Other formats" id="oth-2311.00167" aria-labelledby="oth-2311.00167">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Hierarchical Information-sharing Convolutional Neural Network for the Prediction of Arctic Sea Ice Concentration and Velocity </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Koo,+Y">Younghyun Koo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rahnemoonfar,+M">Maryam Rahnemoonfar</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Computer Vision and Pattern Recognition (cs.CV); Atmospheric and Oceanic Physics (physics.ao-ph) </div> <p class='mathjax'> Forecasting sea ice concentration (SIC) and sea ice velocity (SIV) in the Arctic Ocean is of great significance as the Arctic environment has been changed by the recent warming climate. Given that physical sea ice models require high computational costs with complex parameterization, deep learning techniques can effectively replace the physical model and improve the performance of sea ice prediction. This study proposes a novel multi-task fully conventional network architecture named hierarchical information-sharing U-net (HIS-Unet) to predict daily SIC and SIV. Instead of learning SIC and SIV separately at each branch, we allow the SIC and SIV layers to share their information and assist each other's prediction through the weighting attention modules (WAMs). Consequently, our HIS-Unet outperforms other statistical approaches, sea ice physical models, and neural networks without such information-sharing units. The improvement of HIS-Unet is more significant to when and where SIC changes seasonally, which implies that the information sharing between SIC and SIV through WAMs helps learn the dynamic changes of SIC and SIV. The weight values of the WAMs imply that SIC information plays a more critical role in SIV prediction, compared to that of SIV information in SIC prediction, and information sharing is more active in marginal ice zones (e.g., East Greenland and Hudson/Baffin Bays) than in the central Arctic. </p> </div> </dd> <dt> <a name='item450'>[450]</a> <a href ="/abs/2311.00207" title="Abstract" id="2311.00207"> arXiv:2311.00207 </a> (replaced) [<a href="/pdf/2311.00207" title="Download PDF" id="pdf-2311.00207" aria-labelledby="pdf-2311.00207">pdf</a>, <a href="https://arxiv.org/html/2311.00207v3" title="View HTML" id="html-2311.00207" aria-labelledby="html-2311.00207" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2311.00207" title="Other formats" id="oth-2311.00207" aria-labelledby="oth-2311.00207">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Magmaw: Modality-Agnostic Adversarial Attacks on Machine Learning-Based Wireless Communication Systems </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Chang,+J">Jung-Woo Chang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sun,+K">Ke Sun</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Heydaribeni,+N">Nasimeh Heydaribeni</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hidano,+S">Seira Hidano</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+X">Xinyu Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Koushanfar,+F">Farinaz Koushanfar</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted at NDSS 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Cryptography and Security (cs.CR)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Machine Learning (ML) has been instrumental in enabling joint transceiver optimization by merging all physical layer blocks of the end-to-end wireless communication systems. Although there have been a number of adversarial attacks on ML-based wireless systems, the existing methods do not provide a comprehensive view including multi-modality of the source data, common physical layer protocols, and wireless domain constraints. This paper proposes Magmaw, a novel wireless attack methodology capable of generating universal adversarial perturbations for any multimodal signal transmitted over a wireless channel. We further introduce new objectives for adversarial attacks on downstream applications. We adopt the widely-used defenses to verify the resilience of Magmaw. For proof-of-concept evaluation, we build a real-time wireless attack platform using a software-defined radio system. Experimental results demonstrate that Magmaw causes significant performance degradation even in the presence of strong defense mechanisms. Furthermore, we validate the performance of Magmaw in two case studies: encrypted communication channel and channel modality-based ML model. </p> </div> </dd> <dt> <a name='item451'>[451]</a> <a href ="/abs/2311.03478" title="Abstract" id="2311.03478"> arXiv:2311.03478 </a> (replaced) [<a href="/pdf/2311.03478" title="Download PDF" id="pdf-2311.03478" aria-labelledby="pdf-2311.03478">pdf</a>, <a href="https://arxiv.org/html/2311.03478v2" title="View HTML" id="html-2311.03478" aria-labelledby="html-2311.03478" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2311.03478" title="Other formats" id="oth-2311.03478" aria-labelledby="oth-2311.03478">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Multi Loss-based Feature Fusion and Top Two Voting Ensemble Decision Strategy for Facial Expression Recognition in the Wild </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+G">Guangyao Zhou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xie,+Y">Yuanlun Xie</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fu,+Y">Yiqin Fu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Z">Zhaokun Wang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 12 pages, 8 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Facial expression recognition (FER) in the wild is a challenging task affected by the image quality and has attracted broad interest in computer vision. There is no research using feature fusion and ensemble strategy for FER simultaneously. Different from previous studies, this paper applies both internal feature fusion for a single model and feature fusion among multiple networks, as well as the ensemble strategy. This paper proposes one novel single model named R18+FAML, as well as one ensemble model named R18+FAML-FGA-T2V to improve the performance of the FER in the wild. Based on the structure of ResNet18 (R18), R18+FAML combines internal Feature fusion and three Attention blocks using Multiple Loss functions (FAML) to improve the diversity of the feature extraction. To improve the performance of R18+FAML, we propose a Feature fusion among networks based on the Genetic Algorithm (FGA), which can fuse the convolution kernels for feature extraction of multiple networks. On the basis of R18+FAML and FGA, we propose one ensemble strategy, i.e., the Top Two Voting (T2V) to support the classification of FER, which can consider more classification information comprehensively. Combining the above strategies, R18+FAML-FGA-T2V can focus on the main expression-aware areas. Extensive experiments demonstrate that our single model R18+FAML and the ensemble model R18+FAML-FGA-T2V achieve the accuracies of $\left( 90.32, 62.17, 65.83 \right)\%$ and $\left( 91.59, 63.27, 66.63 \right)\%$ on three challenging unbalanced FER datasets RAF-DB, AffectNet-8 and AffectNet-7 respectively, both outperforming the state-of-the-art results. </p> </div> </dd> <dt> <a name='item452'>[452]</a> <a href ="/abs/2311.13159" title="Abstract" id="2311.13159"> arXiv:2311.13159 </a> (replaced) [<a href="/pdf/2311.13159" title="Download PDF" id="pdf-2311.13159" aria-labelledby="pdf-2311.13159">pdf</a>, <a href="https://arxiv.org/html/2311.13159v2" title="View HTML" id="html-2311.13159" aria-labelledby="html-2311.13159" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2311.13159" title="Other formats" id="oth-2311.13159" aria-labelledby="oth-2311.13159">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Multi-Objective Optimization via Wasserstein-Fisher-Rao Gradient Flow </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ren,+Y">Yinuo Ren</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xiao,+T">Tesi Xiao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gangwani,+T">Tanmay Gangwani</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rangi,+A">Anshuka Rangi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rahmanian,+H">Holakou Rahmanian</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ying,+L">Lexing Ying</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sanyal,+S">Subhajit Sanyal</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Optimization and Control (math.OC); Machine Learning (stat.ML) </div> <p class='mathjax'> Multi-objective optimization (MOO) aims to optimize multiple, possibly conflicting objectives with widespread applications. We introduce a novel interacting particle method for MOO inspired by molecular dynamics simulations. Our approach combines overdamped Langevin and birth-death dynamics, incorporating a "dominance potential" to steer particles toward global Pareto optimality. In contrast to previous methods, our method is able to relocate dominated particles, making it particularly adept at managing Pareto fronts of complicated geometries. Our method is also theoretically grounded as a Wasserstein-Fisher-Rao gradient flow with convergence guarantees. Extensive experiments confirm that our approach outperforms state-of-the-art methods on challenging synthetic and real-world datasets. </p> </div> </dd> <dt> <a name='item453'>[453]</a> <a href ="/abs/2311.15414" title="Abstract" id="2311.15414"> arXiv:2311.15414 </a> (replaced) [<a href="/pdf/2311.15414" title="Download PDF" id="pdf-2311.15414" aria-labelledby="pdf-2311.15414">pdf</a>, <a href="https://arxiv.org/html/2311.15414v3" title="View HTML" id="html-2311.15414" aria-labelledby="html-2311.15414" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2311.15414" title="Other formats" id="oth-2311.15414" aria-labelledby="oth-2311.15414">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> KOPPA: Improving Prompt-based Continual Learning with Key-Query Orthogonal Projection and Prototype-based One-Versus-All </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Tran,+Q">Quyen Tran</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Phan,+H">Hoang Phan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tran,+L">Lam Tran</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Than,+K">Khoat Than</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tran,+T">Toan Tran</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Phung,+D">Dinh Phung</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Le,+T">Trung Le</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> Drawing inspiration from prompt tuning techniques applied to Large Language Models, recent methods based on pre-trained ViT networks have achieved remarkable results in the field of Continual Learning. Specifically, these approaches propose to maintain a set of prompts and allocate a subset of them to learn each task using a key-query matching strategy. However, they may encounter limitations when lacking control over the correlations between old task queries and keys of future tasks, the shift of features in the latent space, and the relative separation of latent vectors learned in independent tasks. In this work, we introduce a novel key-query learning strategy based on orthogonal projection, inspired by model-agnostic meta-learning, to enhance prompt matching efficiency and address the challenge of shifting features. Furthermore, we introduce a One-Versus-All (OVA) prototype-based component that enhances the classification head distinction. Experimental results on benchmark datasets demonstrate that our method empowers the model to achieve results surpassing those of current state-of-the-art approaches by a large margin of up to 20%. </p> </div> </dd> <dt> <a name='item454'>[454]</a> <a href ="/abs/2311.15864" title="Abstract" id="2311.15864"> arXiv:2311.15864 </a> (replaced) [<a href="/pdf/2311.15864" title="Download PDF" id="pdf-2311.15864" aria-labelledby="pdf-2311.15864">pdf</a>, <a href="https://arxiv.org/html/2311.15864v4" title="View HTML" id="html-2311.15864" aria-labelledby="html-2311.15864" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2311.15864" title="Other formats" id="oth-2311.15864" aria-labelledby="oth-2311.15864">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> InterControl: Zero-shot Human Interaction Generation by Controlling Every Joint </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Z">Zhenzhi Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+J">Jingbo Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+Y">Yixuan Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lin,+D">Dahua Lin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dai,+B">Bo Dai</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> NeurIPS 2024 camera ready version. TL;DR: Generate human interactions with only single-person motion data in training via joint contact pairs from LLMs </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Text-conditioned motion synthesis has made remarkable progress with the emergence of diffusion models. However, the majority of these motion diffusion models are primarily designed for a single character and overlook multi-human interactions. In our approach, we strive to explore this problem by synthesizing human motion with interactions for a group of characters of any size in a zero-shot manner. The key aspect of our approach is the adaptation of human-wise interactions as pairs of human joints that can be either in contact or separated by a desired distance. In contrast to existing methods that necessitate training motion generation models on multi-human motion datasets with a fixed number of characters, our approach inherently possesses the flexibility to model human interactions involving an arbitrary number of individuals, thereby transcending the limitations imposed by the training data. We introduce a novel controllable motion generation method, InterControl, to encourage the synthesized motions maintaining the desired distance between joint pairs. It consists of a motion controller and an inverse kinematics guidance module that realistically and accurately aligns the joints of synthesized characters to the desired location. Furthermore, we demonstrate that the distance between joint pairs for human-wise interactions can be generated using an off-the-shelf Large Language Model (LLM). Experimental results highlight the capability of our framework to generate interactions with multiple human characters and its potential to work with off-the-shelf physics-based character simulators. Code is available at <a href="https://github.com/zhenzhiwang/intercontrol" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </p> </div> </dd> <dt> <a name='item455'>[455]</a> <a href ="/abs/2311.16141" title="Abstract" id="2311.16141"> arXiv:2311.16141 </a> (replaced) [<a href="/pdf/2311.16141" title="Download PDF" id="pdf-2311.16141" aria-labelledby="pdf-2311.16141">pdf</a>, <a href="https://arxiv.org/html/2311.16141v3" title="View HTML" id="html-2311.16141" aria-labelledby="html-2311.16141" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2311.16141" title="Other formats" id="oth-2311.16141" aria-labelledby="oth-2311.16141">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Brain-Inspired Efficient Pruning: Exploiting Criticality in Spiking Neural Networks </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+S">Shuo Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+B">Boxiao Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Z">Zeshi Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=You,+H">Haihang You</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Neural and Evolutionary Computing (cs.NE)</span>; Artificial Intelligence (cs.AI); Computer Vision and Pattern Recognition (cs.CV); Machine Learning (cs.LG) </div> <p class='mathjax'> Spiking Neural Networks (SNNs) have gained significant attention due to the energy-efficient and multiplication-free characteristics. Despite these advantages, deploying large-scale SNNs on edge hardware is challenging due to limited resource availability. Network pruning offers a viable approach to compress the network scale and reduce hardware resource requirements for model deployment. However, existing SNN pruning methods cause high pruning costs and performance loss because they lack efficiency in processing the sparse spike representation of SNNs. In this paper, inspired by the critical brain hypothesis in neuroscience and the high biological plausibility of SNNs, we explore and leverage criticality to facilitate efficient pruning in deep SNNs. We firstly explain criticality in SNNs from the perspective of maximizing feature information entropy. Second, We propose a low-cost metric for assess neuron criticality in feature transmission and design a pruning-regeneration method that incorporates this criticality into the pruning process. Experimental results demonstrate that our method achieves higher performance than the current state-of-the-art (SOTA) method with up to 95.26\% reduction of pruning cost. The criticality-based regeneration process efficiently selects potential structures and facilitates consistent feature representation. </p> </div> </dd> <dt> <a name='item456'>[456]</a> <a href ="/abs/2312.00236" title="Abstract" id="2312.00236"> arXiv:2312.00236 </a> (replaced) [<a href="/pdf/2312.00236" title="Download PDF" id="pdf-2312.00236" aria-labelledby="pdf-2312.00236">pdf</a>, <a href="https://arxiv.org/html/2312.00236v3" title="View HTML" id="html-2312.00236" aria-labelledby="html-2312.00236" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2312.00236" title="Other formats" id="oth-2312.00236" aria-labelledby="oth-2312.00236">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Brainformer: Mimic Human Visual Brain Functions to Machine Vision Models via fMRI </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Nguyen,+X">Xuan-Bac Nguyen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+X">Xin Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sinha,+P">Pawan Sinha</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Khan,+S+U">Samee U. Khan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Luu,+K">Khoa Luu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Human perception plays a vital role in forming beliefs and understanding reality. A deeper understanding of brain functionality will lead to the development of novel deep neural networks. In this work, we introduce a novel framework named Brainformer, a straightforward yet effective Transformer-based framework, to analyze Functional Magnetic Resonance Imaging (fMRI) patterns in the human perception system from a machine-learning perspective. Specifically, we present the Multi-scale fMRI Transformer to explore brain activity patterns through fMRI signals. This architecture includes a simple yet efficient module for high-dimensional fMRI signal encoding and incorporates a novel embedding technique called 3D Voxels Embedding. Secondly, drawing inspiration from the functionality of the brain's Region of Interest, we introduce a novel loss function called Brain fMRI Guidance Loss. This loss function mimics brain activity patterns from these regions in the deep neural network using fMRI data. This work introduces a prospective approach to transferring knowledge from human perception to neural networks. Our experiments demonstrate that leveraging fMRI information allows the machine vision model to achieve results comparable to State-of-the-Art methods in various image recognition tasks. </p> </div> </dd> <dt> <a name='item457'>[457]</a> <a href ="/abs/2312.02124" title="Abstract" id="2312.02124"> arXiv:2312.02124 </a> (replaced) [<a href="/pdf/2312.02124" title="Download PDF" id="pdf-2312.02124" aria-labelledby="pdf-2312.02124">pdf</a>, <a href="https://arxiv.org/html/2312.02124v2" title="View HTML" id="html-2312.02124" aria-labelledby="html-2312.02124" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2312.02124" title="Other formats" id="oth-2312.02124" aria-labelledby="oth-2312.02124">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> VerA: Versatile Anonymization Applicable to Clinical Facial Photographs </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Helou,+M+E">Majed El Helou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cetin,+D">Doruk Cetin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Stamenkovic,+P">Petar Stamenkovic</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huber,+N+B">Niko Benjamin Huber</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Z%C3%BCnd,+F">Fabio Z眉nd</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> accepted to WACV 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Graphics (cs.GR); Machine Learning (cs.LG) </div> <p class='mathjax'> The demand for privacy in facial image dissemination is gaining ground internationally, echoed by the proliferation of regulations such as GDPR, DPDPA, CCPA, PIPL, and APPI. While recent advances in anonymization surpass pixelation or blur methods, additional constraints to the task pose challenges. Largely unaddressed by current anonymization methods are clinical images and pairs of before-and-after clinical images illustrating facial medical interventions, e.g., facial surgeries or dental procedures. We present VerA, the first Versatile Anonymization framework that solves two challenges in clinical applications: A) it preserves selected semantic areas (e.g., mouth region) to show medical intervention results, that is, anonymization is only applied to the areas outside the preserved area; and B) it produces anonymized images with consistent personal identity across multiple photographs, which is crucial for anonymizing photographs of the same person taken before and after a clinical intervention. We validate our results on both single and paired anonymization of clinical images through extensive quantitative and qualitative evaluation. We also demonstrate that VerA reaches the state of the art on established anonymization tasks, in terms of photorealism and de-identification. </p> </div> </dd> <dt> <a name='item458'>[458]</a> <a href ="/abs/2312.02611" title="Abstract" id="2312.02611"> arXiv:2312.02611 </a> (replaced) [<a href="/pdf/2312.02611" title="Download PDF" id="pdf-2312.02611" aria-labelledby="pdf-2312.02611">pdf</a>, <a href="/format/2312.02611" title="Other formats" id="oth-2312.02611" aria-labelledby="oth-2312.02611">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Privacy-Aware Data Acquisition under Data Similarity in Regression Markets </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Pandey,+S+R">Shashi Raj Pandey</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pinson,+P">Pierre Pinson</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Popovski,+P">Petar Popovski</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Submitted to IEEE Transactions on Neural Networks and Learning Systems </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Cryptography and Security (cs.CR); Computer Science and Game Theory (cs.GT) </div> <p class='mathjax'> Data markets facilitate decentralized data exchange for applications such as prediction, learning, or inference. The design of these markets is challenged by varying privacy preferences as well as data similarity among data owners. Related works have often overlooked how data similarity impacts pricing and data value through statistical information leakage. We demonstrate that data similarity and privacy preferences are integral to market design and propose a query-response protocol using local differential privacy for a two-party data acquisition mechanism. In our regression data market model, we analyze strategic interactions between privacy-aware owners and the learner as a Stackelberg game over the asked price and privacy factor. Finally, we numerically evaluate how data similarity affects market participation and traded data value. </p> </div> </dd> <dt> <a name='item459'>[459]</a> <a href ="/abs/2312.02783" title="Abstract" id="2312.02783"> arXiv:2312.02783 </a> (replaced) [<a href="/pdf/2312.02783" title="Download PDF" id="pdf-2312.02783" aria-labelledby="pdf-2312.02783">pdf</a>, <a href="https://arxiv.org/html/2312.02783v4" title="View HTML" id="html-2312.02783" aria-labelledby="html-2312.02783" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2312.02783" title="Other formats" id="oth-2312.02783" aria-labelledby="oth-2312.02783">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Large Language Models on Graphs: A Comprehensive Survey </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Jin,+B">Bowen Jin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+G">Gang Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Han,+C">Chi Han</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jiang,+M">Meng Jiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ji,+H">Heng Ji</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Han,+J">Jiawei Han</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 25 pages </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> Transactions on Knowledge and Data Engineering (TKDE) 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Large language models (LLMs), such as GPT4 and LLaMA, are creating significant advancements in natural language processing, due to their strong text encoding/decoding ability and newly found emergent capability (e.g., reasoning). While LLMs are mainly designed to process pure texts, there are many real-world scenarios where text data is associated with rich structure information in the form of graphs (e.g., academic networks, and e-commerce networks) or scenarios where graph data is paired with rich textual information (e.g., molecules with descriptions). Besides, although LLMs have shown their pure text-based reasoning ability, it is underexplored whether such ability can be generalized to graphs (i.e., graph-based reasoning). In this paper, we provide a systematic review of scenarios and techniques related to large language models on graphs. We first summarize potential scenarios of adopting LLMs on graphs into three categories, namely pure graphs, text-attributed graphs, and text-paired graphs. We then discuss detailed techniques for utilizing LLMs on graphs, including LLM as Predictor, LLM as Encoder, and LLM as Aligner, and compare the advantages and disadvantages of different schools of models. Furthermore, we discuss the real-world applications of such methods and summarize open-source codes and benchmark datasets. Finally, we conclude with potential future research directions in this fast-growing field. The related source can be found at <a href="https://github.com/PeterGriffinJin/Awesome-Language-Model-on-Graphs" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item460'>[460]</a> <a href ="/abs/2312.11122" title="Abstract" id="2312.11122"> arXiv:2312.11122 </a> (replaced) [<a href="/pdf/2312.11122" title="Download PDF" id="pdf-2312.11122" aria-labelledby="pdf-2312.11122">pdf</a>, <a href="https://arxiv.org/html/2312.11122v3" title="View HTML" id="html-2312.11122" aria-labelledby="html-2312.11122" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2312.11122" title="Other formats" id="oth-2312.11122" aria-labelledby="oth-2312.11122">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Evaluation of Dataframe Libraries for Data Preparation on a Single Machine </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Mozzillo,+A">Angelo Mozzillo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zecchini,+L">Luca Zecchini</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gagliardelli,+L">Luca Gagliardelli</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Aslam,+A">Adeel Aslam</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bergamaschi,+S">Sonia Bergamaschi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Simonini,+G">Giovanni Simonini</a></div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> Proceedings 28th International Conference on Extending Database Technology, EDBT 2025, Barcelona, Spain, March 25-28, 2025 (pp. 337-349) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Databases (cs.DB)</span> </div> <p class='mathjax'> Data preparation is a trial-and-error process that typically involves countless iterations over the data to define the best pipeline of operators for a given task. With tabular data, practitioners often perform that burdensome activity on local machines by writing ad hoc scripts with libraries based on the Pandas dataframe API and testing them on samples of the entire dataset-the faster the library, the less idle time its users have. <br>In this paper, we evaluate the most popular Python dataframe libraries in general data preparation use cases to assess how they perform on a single machine. To do so, we employ 4 real-world datasets with heterogeneous features, covering a variety of scenarios, and the TPC-H benchmark. The insights gained with this experimentation are useful to data scientists who need to choose which of the dataframe libraries best suits their data preparation task at hand. <br>In a nutshell, we found that: for small datasets, Pandas consistently proves to be the best choice with the richest API; when data fits in RAM and there is no need for complete compatibility with Pandas API, Polars is the go-to choice thanks to its in-memory execution and query optimizations; when a GPU is available, CuDF often yields the best performance, while for very large datasets that cannot fit in the GPU memory and RAM, PySpark (thanks to a multithread execution and a query optimizer) proves to be the best option. </p> </div> </dd> <dt> <a name='item461'>[461]</a> <a href ="/abs/2401.01127" title="Abstract" id="2401.01127"> arXiv:2401.01127 </a> (replaced) [<a href="/pdf/2401.01127" title="Download PDF" id="pdf-2401.01127" aria-labelledby="pdf-2401.01127">pdf</a>, <a href="https://arxiv.org/html/2401.01127v5" title="View HTML" id="html-2401.01127" aria-labelledby="html-2401.01127" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2401.01127" title="Other formats" id="oth-2401.01127" aria-labelledby="oth-2401.01127">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Wireless 6G Connectivity for Massive Number of Devices and Critical Services </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Kal%C3%B8r,+A+E">Anders E. Kal酶r</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Durisi,+G">Giuseppe Durisi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Coleri,+S">Sinem Coleri</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Parkvall,+S">Stefan Parkvall</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+W">Wei Yu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mueller,+A">Andreas Mueller</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Popovski,+P">Petar Popovski</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted to Proceedings of the IEEE. 19 pages, 8 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Information Theory (cs.IT)</span> </div> <p class='mathjax'> Compared to the generations up to 4G, whose main focus was on broadband and coverage aspects, 5G has expanded the scope of wireless cellular systems towards embracing two new types of connectivity: massive machine-type communication (mMTC) and ultra-reliable low-latency communications (URLLC). This paper discusses the possible evolution of these two types of connectivity within the umbrella of 6G wireless systems. The paper consists of three parts. The first part deals with the connectivity for a massive number of devices. While mMTC research in 5G predominantly focuses on the problem of uncoordinated access in the uplink for a large number of devices, the traffic patterns in 6G may become more symmetric, leading to closed-loop massive connectivity. One of the drivers for this is distributed learning/inference. The second part of the paper discusses the evolution of wireless connectivity for critical services. While latency and reliability are tightly coupled in 5G, 6G will support a variety of safety critical control applications with different types of timing requirements, as evidenced by the emergence of metrics related to information freshness and information value. Additionally, ensuring ultra-high reliability for safety critical control applications requires modeling and estimation of the tail statistics of the wireless channel, queue length, and delay. The fulfillment of these stringent requirements calls for the development of novel AI-based techniques, incorporating optimization theory, explainable AI, generative AI and digital twins. The third part analyzes the coexistence of massive connectivity and critical services. We will consider scenarios in which a massive number of devices need to support traffic patterns of mixed criticality. This is followed by a discussion about the management of wireless resources shared by services with different criticality. </p> </div> </dd> <dt> <a name='item462'>[462]</a> <a href ="/abs/2401.10787" title="Abstract" id="2401.10787"> arXiv:2401.10787 </a> (replaced) [<a href="/pdf/2401.10787" title="Download PDF" id="pdf-2401.10787" aria-labelledby="pdf-2401.10787">pdf</a>, <a href="/format/2401.10787" title="Other formats" id="oth-2401.10787" aria-labelledby="oth-2401.10787">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Hybrid Online Certificate Status Protocol with Certificate Revocation List for Smart Grid Public Key Infrastructure </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+H">Hong-Sheng Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jiang,+Z">Zhe-Yi Jiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+H">Hsuan-Tung Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sun,+H">Hung-Min Sun</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 8 pages, 7 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Cryptography and Security (cs.CR)</span> </div> <p class='mathjax'> Hsu et al. (2022) proposed a cryptographic scheme within the public key infrastructure to bolster the security of smart grid meters. Their proposal involved developing the Certificate Management over CMS mechanism to establish Simple Certificate Enrollment Protocol and Enrollment over Secure Transport protocol. Additionally, they implemented Online Certificate Status Protocol (OCSP) services to independently query the status of certificates. However, their implementation featured a single OCSP server handling all query requests. Considering the typical scenario in smart grid PKI environments with over tens of thousands of end-meters, we introduced a Hybrid Online Certificate Status Protocol mechanism. This approach decreases demand of query resources from the client to OCSP servers collaborating with Certificate Revocation Lists. Our simulations, mimicking meter behavior, demonstrated increased efficiency, creating a more robust architecture tailored to the smart grid meter landscape. </p> </div> </dd> <dt> <a name='item463'>[463]</a> <a href ="/abs/2401.11374" title="Abstract" id="2401.11374"> arXiv:2401.11374 </a> (replaced) [<a href="/pdf/2401.11374" title="Download PDF" id="pdf-2401.11374" aria-labelledby="pdf-2401.11374">pdf</a>, <a href="https://arxiv.org/html/2401.11374v4" title="View HTML" id="html-2401.11374" aria-labelledby="html-2401.11374" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2401.11374" title="Other formats" id="oth-2401.11374" aria-labelledby="oth-2401.11374">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Language Models as Hierarchy Encoders </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=He,+Y">Yuan He</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yuan,+Z">Zhangdie Yuan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+J">Jiaoyan Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Horrocks,+I">Ian Horrocks</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accept at NeurIPS 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> Interpreting hierarchical structures latent in language is a key limitation of current language models (LMs). While previous research has implicitly leveraged these hierarchies to enhance LMs, approaches for their explicit encoding are yet to be explored. To address this, we introduce a novel approach to re-train transformer encoder-based LMs as Hierarchy Transformer encoders (HiTs), harnessing the expansive nature of hyperbolic space. Our method situates the output embedding space of pre-trained LMs within a Poincar茅 ball with a curvature that adapts to the embedding dimension, followed by training on hyperbolic clustering and centripetal losses. These losses are designed to effectively cluster related entities (input as texts) and organise them hierarchically. We evaluate HiTs against pre-trained LMs, standard fine-tuned LMs, and several hyperbolic embedding baselines, focusing on their capabilities in simulating transitive inference, predicting subsumptions, and transferring knowledge across hierarchies. The results demonstrate that HiTs consistently outperform all baselines in these tasks, underscoring the effectiveness and transferability of our re-trained hierarchy encoders. </p> </div> </dd> <dt> <a name='item464'>[464]</a> <a href ="/abs/2401.13721" title="Abstract" id="2401.13721"> arXiv:2401.13721 </a> (replaced) [<a href="/pdf/2401.13721" title="Download PDF" id="pdf-2401.13721" aria-labelledby="pdf-2401.13721">pdf</a>, <a href="https://arxiv.org/html/2401.13721v3" title="View HTML" id="html-2401.13721" aria-labelledby="html-2401.13721" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2401.13721" title="Other formats" id="oth-2401.13721" aria-labelledby="oth-2401.13721">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Uncertainty-Guided Alignment for Unsupervised Domain Adaptation in Regression </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Nejjar,+I">Ismail Nejjar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Frusque,+G">Gaetan Frusque</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Forest,+F">Florent Forest</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fink,+O">Olga Fink</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Unsupervised Domain Adaptation for Regression (UDAR) aims to adapt models from a labeled source domain to an unlabeled target domain for regression tasks. Traditional feature alignment methods, successful in classification, often prove ineffective for regression due to the correlated nature of regression features. To address this challenge, we propose Uncertainty-Guided Alignment (UGA), a novel method that integrates predictive uncertainty into the feature alignment process. UGA employs Evidential Deep Learning to predict both target values and their associated uncertainties. This uncertainty information guides the alignment process and fuses information within the embedding space, effectively mitigating issues such as feature collapse in out-of-distribution scenarios. We evaluate UGA on two computer vision benchmarks and a real-world battery state-of-charge prediction across different manufacturers and operating temperatures. Across 52 transfer tasks, UGA on average outperforms existing state-of-the-art methods. Our approach not only improves adaptation performance but also provides well-calibrated uncertainty estimates. </p> </div> </dd> <dt> <a name='item465'>[465]</a> <a href ="/abs/2402.00712" title="Abstract" id="2402.00712"> arXiv:2402.00712 </a> (replaced) [<a href="/pdf/2402.00712" title="Download PDF" id="pdf-2402.00712" aria-labelledby="pdf-2402.00712">pdf</a>, <a href="https://arxiv.org/html/2402.00712v5" title="View HTML" id="html-2402.00712" aria-labelledby="html-2402.00712" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2402.00712" title="Other formats" id="oth-2402.00712" aria-labelledby="oth-2402.00712">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> ChaosBench: A Multi-Channel, Physics-Based Benchmark for Subseasonal-to-Seasonal Climate Prediction </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Nathaniel,+J">Juan Nathaniel</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Qu,+Y">Yongquan Qu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nguyen,+T">Tung Nguyen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+S">Sungduk Yu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Busecke,+J">Julius Busecke</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Grover,+A">Aditya Grover</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gentine,+P">Pierre Gentine</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> NeurIPS 2024 D&B Track (Oral) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Computers and Society (cs.CY) </div> <p class='mathjax'> Accurate prediction of climate in the subseasonal-to-seasonal scale is crucial for disaster preparedness and robust decision making amidst climate change. Yet, forecasting beyond the weather timescale is challenging because it deals with problems other than initial condition, including boundary interaction, butterfly effect, and our inherent lack of physical understanding. At present, existing benchmarks tend to have shorter forecasting range of up-to 15 days, do not include a wide range of operational baselines, and lack physics-based constraints for explainability. Thus, we propose ChaosBench, a challenging benchmark to extend the predictability range of data-driven weather emulators to S2S timescale. First, ChaosBench is comprised of variables beyond the typical surface-atmospheric ERA5 to also include ocean, ice, and land reanalysis products that span over 45 years to allow for full Earth system emulation that respects boundary conditions. We also propose physics-based, in addition to deterministic and probabilistic metrics, to ensure a physically-consistent ensemble that accounts for butterfly effect. Furthermore, we evaluate on a diverse set of physics-based forecasts from four national weather agencies as baselines to our data-driven counterpart such as ViT/ClimaX, PanguWeather, GraphCast, and FourCastNetV2. Overall, we find methods originally developed for weather-scale applications fail on S2S task: their performance simply collapse to an unskilled climatology. Nonetheless, we outline and demonstrate several strategies that can extend the predictability range of existing weather emulators, including the use of ensembles, robust control of error propagation, and the use of physics-informed models. Our benchmark, datasets, and instructions are available at <a href="https://leap-stc.github.io/ChaosBench" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item466'>[466]</a> <a href ="/abs/2402.01002" title="Abstract" id="2402.01002"> arXiv:2402.01002 </a> (replaced) [<a href="/pdf/2402.01002" title="Download PDF" id="pdf-2402.01002" aria-labelledby="pdf-2402.01002">pdf</a>, <a href="https://arxiv.org/html/2402.01002v3" title="View HTML" id="html-2402.01002" aria-labelledby="html-2402.01002" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2402.01002" title="Other formats" id="oth-2402.01002" aria-labelledby="oth-2402.01002">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> AI-generated faces influence gender stereotypes and racial homogenization </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=AlDahoul,+N">Nouar AlDahoul</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rahwan,+T">Talal Rahwan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zaki,+Y">Yasir Zaki</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 47 pages, 19 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Text-to-image generative AI models such as Stable Diffusion are used daily by millions worldwide. However, the extent to which these models exhibit racial and gender stereotypes is not yet fully understood. Here, we document significant biases in Stable Diffusion across six races, two genders, 32 professions, and eight attributes. Additionally, we examine the degree to which Stable Diffusion depicts individuals of the same race as being similar to one another. This analysis reveals significant racial homogenization, e.g., depicting nearly all Middle Eastern men as bearded, brown-skinned, and wearing traditional attire. We then propose debiasing solutions that allow users to specify the desired distributions of race and gender when generating images while minimizing racial homogenization. Finally, using a preregistered survey experiment, we find evidence that being presented with inclusive AI-generated faces reduces people's racial and gender biases, while being presented with non-inclusive ones increases such biases, regardless of whether the images are labeled as AI-generated. Taken together, our findings emphasize the need to address biases and stereotypes in text-to-image models. </p> </div> </dd> <dt> <a name='item467'>[467]</a> <a href ="/abs/2402.03818" title="Abstract" id="2402.03818"> arXiv:2402.03818 </a> (replaced) [<a href="/pdf/2402.03818" title="Download PDF" id="pdf-2402.03818" aria-labelledby="pdf-2402.03818">pdf</a>, <a href="https://arxiv.org/html/2402.03818v3" title="View HTML" id="html-2402.03818" aria-labelledby="html-2402.03818" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2402.03818" title="Other formats" id="oth-2402.03818" aria-labelledby="oth-2402.03818">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Asymptotic generalization error of a single-layer graph convolutional network </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Duranthon,+O">O. Duranthon</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zdeborov%C3%A1,+L">L. Zdeborov谩</a></div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> Proceedings of the Third Learning on Graphs Conference (LoG 2024), PMLR 269 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Disordered Systems and Neural Networks (cond-mat.dis-nn) </div> <p class='mathjax'> While graph convolutional networks show great practical promises, the theoretical understanding of their generalization properties as a function of the number of samples is still in its infancy compared to the more broadly studied case of supervised fully connected neural networks. In this article, we predict the performances of a single-layer graph convolutional network (GCN) trained on data produced by attributed stochastic block models (SBMs) in the high-dimensional limit. Previously, only ridge regression on contextual-SBM (CSBM) has been considered in Shi et al. 2022; we generalize the analysis to arbitrary convex loss and regularization for the CSBM and add the analysis for another data model, the neural-prior SBM. We also study the high signal-to-noise ratio limit, detail the convergence rates of the GCN and show that, while consistent, it does not reach the Bayes-optimal rate for any of the considered cases. </p> </div> </dd> <dt> <a name='item468'>[468]</a> <a href ="/abs/2402.04032" title="Abstract" id="2402.04032"> arXiv:2402.04032 </a> (replaced) [<a href="/pdf/2402.04032" title="Download PDF" id="pdf-2402.04032" aria-labelledby="pdf-2402.04032">pdf</a>, <a href="https://arxiv.org/html/2402.04032v5" title="View HTML" id="html-2402.04032" aria-labelledby="html-2402.04032" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2402.04032" title="Other formats" id="oth-2402.04032" aria-labelledby="oth-2402.04032">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> ProactivePIM: Accelerating Weight-Sharing Embedding Layer with PIM for Scalable Recommendation System </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+Y">Youngsuk Kim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lim,+J">Junghwan Lim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lee,+H">Hyuk-Jae Lee</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rhee,+C+E">Chae Eun Rhee</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 8 pages, 9 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Hardware Architecture (cs.AR)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> The model size growth of personalized recommendation systems poses new challenges for inference. Weight-sharing algorithms have been proposed for size reduction, but they increase memory access. Recent advancements in processing-in-memory (PIM) enhanced the model throughput by exploiting memory parallelism, but such algorithms introduce massive CPU-PIM communication into prior PIM systems. We propose ProactivePIM, a PIM system for weight-sharing recommendation system acceleration. ProactivePIM integrates a cache within the PIM with a prefetching scheme to leverage a unique locality of the algorithm and eliminate communication overhead through a subtable mapping strategy. ProactivePIM achieves a 4.8x speedup compared to prior works. </p> </div> </dd> <dt> <a name='item469'>[469]</a> <a href ="/abs/2402.04830" title="Abstract" id="2402.04830"> arXiv:2402.04830 </a> (replaced) [<a href="/pdf/2402.04830" title="Download PDF" id="pdf-2402.04830" aria-labelledby="pdf-2402.04830">pdf</a>, <a href="https://arxiv.org/html/2402.04830v5" title="View HTML" id="html-2402.04830" aria-labelledby="html-2402.04830" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2402.04830" title="Other formats" id="oth-2402.04830" aria-labelledby="oth-2402.04830">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Closing the Gap Between SGP4 and High-Precision Propagation via Differentiable Programming </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Acciarini,+G">Giacomo Acciarini</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Baydin,+A+G">At谋l谋m G眉ne艧 Baydin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Izzo,+D">Dario Izzo</a></div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> Acta Astronautica 226(1) (2025) 8 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Earth and Planetary Astrophysics (astro-ph.EP) </div> <p class='mathjax'> The Simplified General Perturbations 4 (SGP4) orbital propagation method is widely used for predicting the positions and velocities of Earth-orbiting objects rapidly and reliably. Despite continuous refinement, SGP models still lack the precision of numerical propagators, which offer significantly smaller errors. This study presents dSGP4, a novel differentiable version of SGP4 implemented using PyTorch. By making SGP4 differentiable, dSGP4 facilitates various space-related applications, including spacecraft orbit determination, state conversion, covariance transformation, state transition matrix computation, and covariance propagation. Additionally, dSGP4's PyTorch implementation allows for embarrassingly parallel orbital propagation across batches of Two-Line Element Sets (TLEs), leveraging the computational power of CPUs, GPUs, and advanced hardware for distributed prediction of satellite positions at future times. Furthermore, dSGP4's differentiability enables integration with modern machine learning techniques. Thus, we propose a novel orbital propagation paradigm, ML-dSGP4, where neural networks are integrated into the orbital propagator. Through stochastic gradient descent, this combined model's inputs, outputs, and parameters can be iteratively refined, surpassing SGP4's precision. Neural networks act as identity operators by default, adhering to SGP4's behavior. However, dSGP4's differentiability allows fine-tuning with ephemeris data, enhancing precision while maintaining computational speed. This empowers satellite operators and researchers to train the model using specific ephemeris or high-precision numerical propagation data, significantly advancing orbital prediction capabilities. </p> </div> </dd> <dt> <a name='item470'>[470]</a> <a href ="/abs/2402.04838" title="Abstract" id="2402.04838"> arXiv:2402.04838 </a> (replaced) [<a href="/pdf/2402.04838" title="Download PDF" id="pdf-2402.04838" aria-labelledby="pdf-2402.04838">pdf</a>, <a href="https://arxiv.org/html/2402.04838v5" title="View HTML" id="html-2402.04838" aria-labelledby="html-2402.04838" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2402.04838" title="Other formats" id="oth-2402.04838" aria-labelledby="oth-2402.04838">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> PaDeLLM-NER: Parallel Decoding in Large Language Models for Named Entity Recognition </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Lu,+J">Jinghui Lu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+Z">Ziwei Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Y">Yanjie Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+X">Xuejing Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mac+Namee,+B">Brian Mac Namee</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+C">Can Huang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted to Neurips2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> In this study, we aim to reduce generation latency for Named Entity Recognition (NER) with Large Language Models (LLMs). The main cause of high latency in LLMs is the sequential decoding process, which autoregressively generates all labels and mentions for NER, significantly increase the sequence length. To this end, we introduce Parallel Decoding in LLM for NE} (PaDeLLM-NER), a approach that integrates seamlessly into existing generative model frameworks without necessitating additional modules or architectural modifications. PaDeLLM-NER allows for the simultaneous decoding of all mentions, thereby reducing generation latency. Experiments reveal that PaDeLLM-NER significantly increases inference speed that is 1.76 to 10.22 times faster than the autoregressive approach for both English and Chinese. Simultaneously it maintains the quality of predictions as evidenced by the performance that is on par with the state-of-the-art across various datasets. </p> </div> </dd> <dt> <a name='item471'>[471]</a> <a href ="/abs/2402.05033" title="Abstract" id="2402.05033"> arXiv:2402.05033 </a> (replaced) [<a href="/pdf/2402.05033" title="Download PDF" id="pdf-2402.05033" aria-labelledby="pdf-2402.05033">pdf</a>, <a href="https://arxiv.org/html/2402.05033v2" title="View HTML" id="html-2402.05033" aria-labelledby="html-2402.05033" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2402.05033" title="Other formats" id="oth-2402.05033" aria-labelledby="oth-2402.05033">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Majority Kernels: An Approach to Leverage Big Model Dynamics for Efficient Small Model Training </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Mazzawi,+H">Hanna Mazzawi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Awasthi,+P">Pranjal Awasthi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gonzalvo,+X">Xavi Gonzalvo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ramalingam,+S">Srikumar Ramalingam</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> Recent breakthroughs and successful deployment of large language and vision models in a constrained environment predominantly follow a two phase approach. First, large models are trained to achieve peak performance, followed by a model shrinking method to meet hardware constraints; Methods like distillation, compression or quantization help leverage the highly performant large models to induce smaller performant ones. Formally, this can be seen as the problem of identifying an optimal model of size $n$ from a larger model of size $k \cdot n$, where $k > 1$ is the overparameterization factor. This paper explores the hypothesis that a single training run can simultaneously train a larger model for performance and derive a smaller model for deployment. <br>Our contribution is an effective architectural change, namely, {\it Majority Kernels} that is compatible with the main standard architectures such as multi-layer perceptrons (MLPs), Residual networks (ResNets), and Transformers. We demonstrate that applying our technique can modify the training dynamics resulting in performance gains across architectures and tasks while maintaining the inference performance consistent. Furthermore, our approach adds minimal overhead to the cost incurred (wall clock time) at training time. The proposed approach shows strong performance on a wide variety of datasets and models, even outperforming strong baselines such as distilled ensembles as well as combinatorial optimization methods based on submodular optimization. </p> </div> </dd> <dt> <a name='item472'>[472]</a> <a href ="/abs/2402.05291" title="Abstract" id="2402.05291"> arXiv:2402.05291 </a> (replaced) [<a href="/pdf/2402.05291" title="Download PDF" id="pdf-2402.05291" aria-labelledby="pdf-2402.05291">pdf</a>, <a href="https://arxiv.org/html/2402.05291v2" title="View HTML" id="html-2402.05291" aria-labelledby="html-2402.05291" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2402.05291" title="Other formats" id="oth-2402.05291" aria-labelledby="oth-2402.05291">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Graph convolutional network as a fast statistical emulator for numerical ice sheet modeling </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Rahnemoonfar,+M">Maryam Rahnemoonfar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Koo,+Y">Younghyun Koo</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted to Journal of Glaciology on November 20, 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Computational Engineering, Finance, and Science (cs.CE) </div> <p class='mathjax'> The Ice-sheet and Sea-level System Model (ISSM) provides numerical solutions for ice sheet dynamics using finite element and fine mesh adaption. However, considering ISSM is compatible only with central processing units (CPUs), it has limitations in economizing computational time to explore the linkage between climate forcings and ice dynamics. Although several deep learning emulators using graphic processing units (GPUs) have been proposed to accelerate ice sheet modeling, most of them rely on convolutional neural networks (CNNs) designed for regular grids. Since they are not appropriate for the irregular meshes of ISSM, we use a graph convolutional network (GCN) to replicate the adapted mesh structures of the ISSM. When applied to transient simulations of the Pine Island Glacier (PIG), Antarctica, the GCN successfully reproduces ice thickness and velocity with a correlation coefficient of approximately 0.997, outperforming non-graph models, including fully convolutional network (FCN) and multi-layer perceptron (MLP). Compared to the fixed-resolution approach of the FCN, the flexible-resolution structure of the GCN accurately captures detailed ice dynamics in fast-ice regions. By leveraging 60-100 times faster computational time of the GPU-based GCN emulator, we efficiently examine the impacts of basal melting rates on the ice sheet dynamics in the PIG. </p> </div> </dd> <dt> <a name='item473'>[473]</a> <a href ="/abs/2402.12623" title="Abstract" id="2402.12623"> arXiv:2402.12623 </a> (replaced) [<a href="/pdf/2402.12623" title="Download PDF" id="pdf-2402.12623" aria-labelledby="pdf-2402.12623">pdf</a>, <a href="/format/2402.12623" title="Other formats" id="oth-2402.12623" aria-labelledby="oth-2402.12623">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Effective Edge Centrality via Neighborhood-based Optimization </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+R">Renchi Yang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 11 pages </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Social and Information Networks (cs.SI)</span> </div> <p class='mathjax'> Given a network G, edge centrality is a metric used to evaluate the importance of edges in G, which is a key concept in analyzing networks and finds vast applications involving edge ranking. In spite of a wealth of research on devising edge centrality measures, they incur either prohibitively high computation costs or varied deficiencies that lead to sub-optimal ranking quality. <br>To overcome their limitations, this paper proposes ECHO, a new centrality measure for edge ranking that is formulated based on neighborhood-based optimization objectives. We provide in-depth theoretical analyses to unveil the mathematical definitions and intuitive interpretations of the proposed ECHO measure from diverse aspects. Based thereon, we present three linear-complexity algorithms for ECHO estimation with non-trivial theoretical accuracy guarantees for centrality values. Extensive experiments comparing ECHO against six existing edge centrality metrics in graph analytics tasks on real networks showcase that ECHO offers superior practical effectiveness while offering high computation efficiency. </p> </div> </dd> <dt> <a name='item474'>[474]</a> <a href ="/abs/2402.14177" title="Abstract" id="2402.14177"> arXiv:2402.14177 </a> (replaced) [<a href="/pdf/2402.14177" title="Download PDF" id="pdf-2402.14177" aria-labelledby="pdf-2402.14177">pdf</a>, <a href="https://arxiv.org/html/2402.14177v3" title="View HTML" id="html-2402.14177" aria-labelledby="html-2402.14177" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2402.14177" title="Other formats" id="oth-2402.14177" aria-labelledby="oth-2402.14177">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Investigating Human Values in Online Communities </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Borenstein,+N">Nadav Borenstein</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Arora,+A">Arnav Arora</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kaffee,+L">Lucie-Aim茅e Kaffee</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Augenstein,+I">Isabelle Augenstein</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Social and Information Networks (cs.SI)</span>; Computers and Society (cs.CY) </div> <p class='mathjax'> Studying human values is instrumental for cross-cultural research, enabling a better understanding of preferences and behaviour of society at large and communities therein. To study the dynamics of communities online, we propose a method to computationally analyse values present on Reddit. Our method allows analysis at scale, complementing survey based approaches. We train a value relevance and a value polarity classifier, which we thoroughly evaluate using in-domain and out-of-domain human annotations. Using these, we automatically annotate over six million posts across 12k subreddits with Schwartz values. Our analysis unveils both previously recorded and novel insights into the values prevalent within various online communities. For instance, we discover a very negative stance towards conformity in the Vegan and AbolishTheMonarchy subreddits. Additionally, our study of geographically specific subreddits highlights the correlation between traditional values and conservative U.S. states. Through our work, we demonstrate how our dataset and method can be used as a complementary tool for qualitative study of online communication. </p> </div> </dd> <dt> <a name='item475'>[475]</a> <a href ="/abs/2402.15368" title="Abstract" id="2402.15368"> arXiv:2402.15368 </a> (replaced) [<a href="/pdf/2402.15368" title="Download PDF" id="pdf-2402.15368" aria-labelledby="pdf-2402.15368">pdf</a>, <a href="https://arxiv.org/html/2402.15368v4" title="View HTML" id="html-2402.15368" aria-labelledby="html-2402.15368" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2402.15368" title="Other formats" id="oth-2402.15368" aria-labelledby="oth-2402.15368">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Probabilistically Correct Language-based Multi-Robot Planning using Conformal Prediction </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+J">Jun Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=He,+G">Guocheng He</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kantaros,+Y">Yiannis Kantaros</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> This paper addresses task planning problems for language-instructed robot teams. Tasks are expressed in natural language (NL), requiring the robots to apply their capabilities at various locations and semantic objects. Several recent works have addressed similar planning problems by leveraging pre-trained Large Language Models (LLMs) to design effective multi-robot plans. However, these approaches lack performance guarantees. To address this challenge, we introduce a new distributed LLM-based planner, called S-ATLAS for Safe plAnning for Teams of Language-instructed AgentS, that is capable of achieving user-defined mission success rates. This is accomplished by leveraging conformal prediction (CP), a distribution-free uncertainty quantification tool in black-box models. CP allows the proposed multi-robot planner to reason about its inherent uncertainty in a distributed fashion, enabling robots to make individual decisions when they are sufficiently certain and seek help otherwise. We show, both theoretically and empirically, that the proposed planner can achieve user-specified task success rates, assuming successful plan execution, while minimizing the overall number of help requests. We provide comparative experiments against related works showing that our method is significantly more computational efficient and achieves lower help rates. The advantage of our algorithm over baselines becomes more pronounced with increasing robot team size. </p> </div> </dd> <dt> <a name='item476'>[476]</a> <a href ="/abs/2402.17304" title="Abstract" id="2402.17304"> arXiv:2402.17304 </a> (replaced) [<a href="/pdf/2402.17304" title="Download PDF" id="pdf-2402.17304" aria-labelledby="pdf-2402.17304">pdf</a>, <a href="https://arxiv.org/html/2402.17304v3" title="View HTML" id="html-2402.17304" aria-labelledby="html-2402.17304" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2402.17304" title="Other formats" id="oth-2402.17304" aria-labelledby="oth-2402.17304">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Probing Multimodal Large Language Models for Global and Local Semantic Representations </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Tao,+M">Mingxu Tao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+Q">Quzhe Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+K">Kun Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+L">Liwei Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Feng,+Y">Yansong Feng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+D">Dongyan Zhao</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted by LREC-COLING 2024 as a short paper. ACL Anthology URL: [<a href="https://aclanthology.org/2024.lrec-main.1142/" rel="external noopener nofollow" class="link-external link-https">this https URL</a>] </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> The advancement of Multimodal Large Language Models (MLLMs) has greatly accelerated the development of applications in understanding integrated texts and images. Recent works leverage image-caption datasets to train MLLMs, achieving state-of-the-art performance on image-to-text tasks. However, there are few studies exploring which layers of MLLMs make the most effort to the global image information, which plays vital roles in multimodal comprehension and generation. In this study, we find that the intermediate layers of models can encode more global semantic information, whose representation vectors perform better on visual-language entailment tasks, rather than the topmost layers. We further probe models regarding local semantic representations through object recognition tasks. We find that the topmost layers may excessively focus on local information, leading to a diminished ability to encode global information. Our code and data are released via <a href="https://github.com/kobayashikanna01/probing_MLLM_rep" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item477'>[477]</a> <a href ="/abs/2402.17497" title="Abstract" id="2402.17497"> arXiv:2402.17497 </a> (replaced) [<a href="/pdf/2402.17497" title="Download PDF" id="pdf-2402.17497" aria-labelledby="pdf-2402.17497">pdf</a>, <a href="https://arxiv.org/html/2402.17497v2" title="View HTML" id="html-2402.17497" aria-labelledby="html-2402.17497" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2402.17497" title="Other formats" id="oth-2402.17497" aria-labelledby="oth-2402.17497">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> REAR: A Relevance-Aware Retrieval-Augmented Framework for Open-Domain Question Answering </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Y">Yuhao Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ren,+R">Ruiyang Ren</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+J">Junyi Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+W+X">Wayne Xin Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+J">Jing Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wen,+J">Ji-Rong Wen</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted to EMNLP 2024 Main Conference. Published on ACL Anthology: <a href="https://aclanthology.org/2024.emnlp-main.321.pdf" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Information Retrieval (cs.IR) </div> <p class='mathjax'> Considering the limited internal parametric knowledge, retrieval-augmented generation (RAG) has been widely used to extend the knowledge scope of large language models (LLMs). Despite the extensive efforts on RAG research, in existing methods, LLMs cannot precisely assess the relevance of retrieved documents, thus likely leading to misleading or even incorrect utilization of external knowledge (eg., retrieved documents). To address this issue, in this paper, we propose REAR, a RElevance-Aware Retrieval-augmented approach for open-domain question answering (QA). As the key motivation, we aim to enhance the self-awareness regarding the reliability of external knowledge for LLMs, so as to adaptively utilize external knowledge in RAG systems. Specially, we develop a novel architecture for LLM-based RAG systems, by incorporating a specially designed assessment module that precisely assesses the relevance of retrieved documents. Furthermore, we propose an improved training method based on bi-granularity relevance fusion and noise-resistant training. By combining the improvements in both architecture and training, our proposed REAR can better utilize external knowledge by effectively perceiving the relevance of retrieved documents. Experiments on four open-domain QA tasks show that REAR significantly outperforms previous a number of competitive RAG approaches. Our codes can be accessed at <a href="https://github.com/RUCAIBox/REAR" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item478'>[478]</a> <a href ="/abs/2402.17805" title="Abstract" id="2402.17805"> arXiv:2402.17805 </a> (replaced) [<a href="/pdf/2402.17805" title="Download PDF" id="pdf-2402.17805" aria-labelledby="pdf-2402.17805">pdf</a>, <a href="https://arxiv.org/html/2402.17805v2" title="View HTML" id="html-2402.17805" aria-labelledby="html-2402.17805" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2402.17805" title="Other formats" id="oth-2402.17805" aria-labelledby="oth-2402.17805">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Graph Neural Networks and Arithmetic Circuits </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Barlag,+T">Timon Barlag</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Holzapfel,+V">Vivian Holzapfel</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Strieker,+L">Laura Strieker</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Virtema,+J">Jonni Virtema</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Vollmer,+H">Heribert Vollmer</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Computational Complexity (cs.CC) </div> <p class='mathjax'> We characterize the computational power of neural networks that follow the graph neural network (GNN) architecture, not restricted to aggregate-combine GNNs or other particular types. We establish an exact correspondence between the expressivity of GNNs using diverse activation functions and arithmetic circuits over real numbers. In our results the activation function of the network becomes a gate type in the circuit. Our result holds for families of constant depth circuits and networks, both uniformly and non-uniformly, for all common activation functions. </p> </div> </dd> <dt> <a name='item479'>[479]</a> <a href ="/abs/2402.19160" title="Abstract" id="2402.19160"> arXiv:2402.19160 </a> (replaced) [<a href="/pdf/2402.19160" title="Download PDF" id="pdf-2402.19160" aria-labelledby="pdf-2402.19160">pdf</a>, <a href="https://arxiv.org/html/2402.19160v4" title="View HTML" id="html-2402.19160" aria-labelledby="html-2402.19160" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2402.19160" title="Other formats" id="oth-2402.19160" aria-labelledby="oth-2402.19160">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Effective Message Hiding with Order-Preserving Mechanisms </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+G">Gao Yu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xuchong,+Q">Qiu Xuchong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zihan,+Y">Ye Zihan</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> BMVC 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Message hiding, a technique that conceals secret message bits within a cover image, aims to achieve an optimal balance among message capacity, recovery accuracy, and imperceptibility. While convolutional neural networks have notably improved message capacity and imperceptibility, achieving high recovery accuracy remains challenging. This challenge arises because convolutional operations struggle to preserve the sequential order of message bits and effectively address the discrepancy between these two modalities. To address this, we propose StegaFormer, an innovative MLP-based framework designed to preserve bit order and enable global fusion between modalities. Specifically, StegaFormer incorporates three crucial components: Order-Preserving Message Encoder (OPME), Decoder (OPMD) and Global Message-Image Fusion (GMIF). OPME and OPMD aim to preserve the order of message bits by segmenting the entire sequence into equal-length segments and incorporating sequential information during encoding and decoding. Meanwhile, GMIF employs a cross-modality fusion mechanism to effectively fuse the features from the two uncorrelated modalities. Experimental results on the COCO and DIV2K datasets demonstrate that StegaFormer surpasses existing state-of-the-art methods in terms of recovery accuracy, message capacity, and imperceptibility. We will make our code publicly available. </p> </div> </dd> <dt> <a name='item480'>[480]</a> <a href ="/abs/2403.03163" title="Abstract" id="2403.03163"> arXiv:2403.03163 </a> (replaced) [<a href="/pdf/2403.03163" title="Download PDF" id="pdf-2403.03163" aria-labelledby="pdf-2403.03163">pdf</a>, <a href="/format/2403.03163" title="Other formats" id="oth-2403.03163" aria-labelledby="oth-2403.03163">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Design2Code: Benchmarking Multimodal Code Generation for Automated Front-End Engineering </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Si,+C">Chenglei Si</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Y">Yanzhe Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+R">Ryan Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+Z">Zhengyuan Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+R">Ruibo Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+D">Diyi Yang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> The first two authors contributed equally </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Computer Vision and Pattern Recognition (cs.CV); Computers and Society (cs.CY) </div> <p class='mathjax'> Generative AI has made rapid advancements in recent years, achieving unprecedented capabilities in multimodal understanding and code generation. This can enable a new paradigm of front-end development in which multimodal large language models (MLLMs) directly convert visual designs into code implementations. In this work, we construct Design2Code - the first real-world benchmark for this task. Specifically, we manually curate 484 diverse real-world webpages as test cases and develop a set of automatic evaluation metrics to assess how well current multimodal LLMs can generate the code implementations that directly render into the given reference webpages, given the screenshots as input. We also complement automatic metrics with comprehensive human evaluations to validate the performance ranking. To rigorously benchmark MLLMs, we test various multimodal prompting methods on frontier models such as GPT-4o, GPT-4V, Gemini, and Claude. Our fine-grained break-down metrics indicate that models mostly lag in recalling visual elements from the input webpages and generating correct layout designs. </p> </div> </dd> <dt> <a name='item481'>[481]</a> <a href ="/abs/2403.05968" title="Abstract" id="2403.05968"> arXiv:2403.05968 </a> (replaced) [<a href="/pdf/2403.05968" title="Download PDF" id="pdf-2403.05968" aria-labelledby="pdf-2403.05968">pdf</a>, <a href="/format/2403.05968" title="Other formats" id="oth-2403.05968" aria-labelledby="oth-2403.05968">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> IMU as an Input vs. a Measurement of the State in Inertial-Aided State Estimation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Burnett,+K">Keenan Burnett</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Schoellig,+A+P">Angela P. Schoellig</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Barfoot,+T+D">Timothy D. Barfoot</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted to Robotica November 19th, 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span> </div> <p class='mathjax'> Treating IMU measurements as inputs to a motion model and then preintegrating these measurements has almost become a de-facto standard in many robotics applications. However, this approach has a few shortcomings. First, it conflates the IMU measurement noise with the underlying process noise. Second, it is unclear how the state will be propagated in the case of IMU measurement dropout. Third, it does not lend itself well to dealing with multiple high-rate sensors such as a lidar and an IMU or multiple asynchronous IMUs. In this paper, we compare treating an IMU as an input to a motion model against treating it as a measurement of the state in a continuous-time state estimation framework. We methodically compare the performance of these two approaches on a 1D simulation and show that they perform identically, assuming that each method's hyperparameters have been tuned on a training set. We also provide results for our continuous-time lidar-inertial odometry in simulation and on the Newer College Dataset. In simulation, our approach exceeds the performance of an imu-as-input baseline during highly aggressive motion. On the Newer College Dataset, we demonstrate state of the art results. These results show that continuous-time techniques and the treatment of the IMU as a measurement of the state are promising areas of further research. Code for our lidar-inertial odometry can be found at: <a href="https://github.com/utiasASRL/steam_icp" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </p> </div> </dd> <dt> <a name='item482'>[482]</a> <a href ="/abs/2403.07820" title="Abstract" id="2403.07820"> arXiv:2403.07820 </a> (replaced) [<a href="/pdf/2403.07820" title="Download PDF" id="pdf-2403.07820" aria-labelledby="pdf-2403.07820">pdf</a>, <a href="https://arxiv.org/html/2403.07820v3" title="View HTML" id="html-2403.07820" aria-labelledby="html-2403.07820" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2403.07820" title="Other formats" id="oth-2403.07820" aria-labelledby="oth-2403.07820">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> The Variant of Designated Verifier Signature Scheme with Message Recovery </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+H">Hong-Sheng Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fu,+Y">Yu-Lei Fu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lin,+H">Han-Yu Lin</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 11 pages </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Cryptography and Security (cs.CR)</span> </div> <p class='mathjax'> In this work, we introduce a strong Designated Verifier Signature (DVS) scheme that incorporates a message recovery mechanism inspired by the concept of the Universal Designated Verifier Signature (UDVS) scheme. It is worth noting that Saeednia's strong designated verifier signature scheme fails to guarantee the privacy of the signature, making it unsuitable for certain applications such as medical record certificates or voting systems. To overcome this limitation, we extend Lee's strong designated verifier signature with a message recovery scheme to develop a universal designated verifier signature scheme. This universal designated verifier scheme is crafted to safeguard the privacy of signature holders, ensuring that only designated verifiers can authenticate the true signer and recover the messages. </p> </div> </dd> <dt> <a name='item483'>[483]</a> <a href ="/abs/2403.09055" title="Abstract" id="2403.09055"> arXiv:2403.09055 </a> (replaced) [<a href="/pdf/2403.09055" title="Download PDF" id="pdf-2403.09055" aria-labelledby="pdf-2403.09055">pdf</a>, <a href="https://arxiv.org/html/2403.09055v3" title="View HTML" id="html-2403.09055" aria-labelledby="html-2403.09055" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2403.09055" title="Other formats" id="oth-2403.09055" aria-labelledby="oth-2403.09055">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> SemanticDraw: Towards Real-Time Interactive Content Creation from Image Diffusion Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Lee,+J">Jaerin Lee</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jung,+D+S">Daniel Sungho Jung</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lee,+K">Kanggeon Lee</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lee,+K+M">Kyoung Mu Lee</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 20 pages, 15 figures. v3: added tables </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> We introduce SemanticDraw, a new paradigm of interactive content creation where high-quality images are generated in near real-time from given multiple hand-drawn regions, each encoding prescribed semantic meaning. In order to maximize the productivity of content creators and to fully realize their artistic imagination, it requires both quick interactive interfaces and fine-grained regional controls in their tools. Despite astonishing generation quality from recent diffusion models, we find that existing approaches for regional controllability are very slow (52 seconds for $512 \times 512$ image) while not compatible with acceleration methods such as LCM, blocking their huge potential in interactive content creation. From this observation, we build our solution for interactive content creation in two steps: (1) we establish compatibility between region-based controls and acceleration techniques for diffusion models, maintaining high fidelity of multi-prompt image generation with $\times 10$ reduced number of inference steps, (2) we increase the generation throughput with our new multi-prompt stream batch pipeline, enabling low-latency generation from multiple, region-based text prompts on a single RTX 2080 Ti GPU. Our proposed framework is generalizable to any existing diffusion models and acceleration schedulers, allowing sub-second (0.64 seconds) image content creation application upon well-established image diffusion models. Our project page is: <a href="https://jaerinlee.com/research/semantic-draw" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item484'>[484]</a> <a href ="/abs/2403.11585" title="Abstract" id="2403.11585"> arXiv:2403.11585 </a> (replaced) [<a href="/pdf/2403.11585" title="Download PDF" id="pdf-2403.11585" aria-labelledby="pdf-2403.11585">pdf</a>, <a href="https://arxiv.org/html/2403.11585v3" title="View HTML" id="html-2403.11585" aria-labelledby="html-2403.11585" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2403.11585" title="Other formats" id="oth-2403.11585" aria-labelledby="oth-2403.11585">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Linguacodus: A Synergistic Framework for Transformative Code Generation in Machine Learning Pipelines </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Trofimova,+E">Ekaterina Trofimova</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sataev,+E">Emil Sataev</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ustyuzhanin,+A+E">Andrey E. Ustyuzhanin</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Computation and Language (cs.CL); Programming Languages (cs.PL); Software Engineering (cs.SE) </div> <p class='mathjax'> In the ever-evolving landscape of machine learning, seamless translation of natural language descriptions into executable code remains a formidable challenge. This paper introduces Linguacodus, an innovative framework designed to tackle this challenge by deploying a dynamic pipeline that iteratively transforms natural language task descriptions into code through high-level data-shaping instructions. The core of Linguacodus is a fine-tuned large language model (LLM), empowered to evaluate diverse solutions for various problems and select the most fitting one for a given task. This paper details the fine-tuning process, and sheds light on how natural language descriptions can be translated into functional code. Linguacodus represents a substantial leap towards automated code generation, effectively bridging the gap between task descriptions and executable code. It holds great promise for advancing machine learning applications across diverse domains. Additionally, we propose an algorithm capable of transforming a natural description of an ML task into code with minimal human interaction. In extensive experiments on a vast machine learning code dataset originating from Kaggle, we showcase the effectiveness of Linguacodus. The investigations highlight its potential applications across diverse domains, emphasizing its impact on applied machine learning in various scientific fields. </p> </div> </dd> <dt> <a name='item485'>[485]</a> <a href ="/abs/2403.12116" title="Abstract" id="2403.12116"> arXiv:2403.12116 </a> (replaced) [<a href="/pdf/2403.12116" title="Download PDF" id="pdf-2403.12116" aria-labelledby="pdf-2403.12116">pdf</a>, <a href="https://arxiv.org/html/2403.12116v4" title="View HTML" id="html-2403.12116" aria-labelledby="html-2403.12116" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2403.12116" title="Other formats" id="oth-2403.12116" aria-labelledby="oth-2403.12116">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Unsupervised End-to-End Training with a Self-Defined Target </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+D">Dongshu Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Laydevant,+J">J茅r茅mie Laydevant</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pontlevy,+A">Adrien Pontlevy</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Querlioz,+D">Damien Querlioz</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Grollier,+J">Julie Grollier</a></div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> Neuromorph. Comput. Eng. 4 (2024) 044005 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Neural and Evolutionary Computing (cs.NE)</span>; Emerging Technologies (cs.ET); Machine Learning (cs.LG) </div> <p class='mathjax'> Designing algorithms for versatile AI hardware that can learn on the edge using both labeled and unlabeled data is challenging. Deep end-to-end training methods incorporating phases of self-supervised and supervised learning are accurate and adaptable to input data but self-supervised learning requires even more computational and memory resources than supervised learning, too high for current embedded hardware. Conversely, unsupervised layer-by-layer training, such as Hebbian learning, is more compatible with existing hardware but does not integrate well with supervised learning. To address this, we propose a method enabling networks or hardware designed for end-to-end supervised learning to also perform high-performance unsupervised learning by adding two simple elements to the output layer: Winner-Take-All (WTA) selectivity and homeostasis regularization. These mechanisms introduce a "self-defined target" for unlabeled data, allowing purely unsupervised training for both fully-connected and convolutional layers using backpropagation or equilibrium propagation on datasets like MNIST (up to 99.2%), Fashion-MNIST (up to 90.3%), and SVHN (up to 81.5%). We extend this method to semi-supervised learning, adjusting targets based on data type, achieving 96.6% accuracy with only 600 labeled MNIST samples in a multi-layer perceptron. Our results show that this approach can effectively enable networks and hardware initially dedicated to supervised learning to also perform unsupervised learning, adapting to varying availability of labeled data. </p> </div> </dd> <dt> <a name='item486'>[486]</a> <a href ="/abs/2403.12324" title="Abstract" id="2403.12324"> arXiv:2403.12324 </a> (replaced) [<a href="/pdf/2403.12324" title="Download PDF" id="pdf-2403.12324" aria-labelledby="pdf-2403.12324">pdf</a>, <a href="https://arxiv.org/html/2403.12324v5" title="View HTML" id="html-2403.12324" aria-labelledby="html-2403.12324" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2403.12324" title="Other formats" id="oth-2403.12324" aria-labelledby="oth-2403.12324">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Towards a Theory of Pragmatic Information </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Weinberger,+E+D">Edward D. Weinberger</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 15 pages, no figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Information Theory (cs.IT)</span> </div> <p class='mathjax'> Standard information theory says nothing about how much meaning is conveyed by a message. We fill this gap with a rigorously justifiable, quantitative definition of ``pragmatic information'', the amount of meaning in a message relevant to a particular decision. We posit that such a message updates a random variable, $\omega$, that informs the decision. The pragmatic information of a single message is then defined as the Kulbach-Leibler divergence between the apriori and aposteriori probabilities of $\omega$; the pragmatic information of a message ensemble is the expected value of the pragmatic information of the ensemble's component messages. We justify these definitions by proving that the pragmatic information of a single message is the expected difference between the shortest binary encoding of $\omega$ under the a priori and a posteriori distributions, and that the average of the pragmatic values of individual messages, when sampled a large number of times from the ensemble, approaches its expected value. <br>Pragmatic information is non-negative and additive for independent decisions and ``pragmatically independent'' messages. Also, pragmatic information is the information analogue of free energy: just as free energy quantifies the part of a system's total energy available to do useful work, so pragmatic information quantifies the information actually used in making a decision. <br>We sketch 3 applications: the single play of a slot machine, a.k.a. a ``one armed bandit'', with an unknown payout probability; a characterization of the rate of biological evolution in the so-called ``quasi-species'' model; and a reformulation of the efficient market hypothesis of finance. We note the importance of the computational capacity of the receiver in each case. </p> </div> </dd> <dt> <a name='item487'>[487]</a> <a href ="/abs/2403.14320" title="Abstract" id="2403.14320"> arXiv:2403.14320 </a> (replaced) [<a href="/pdf/2403.14320" title="Download PDF" id="pdf-2403.14320" aria-labelledby="pdf-2403.14320">pdf</a>, <a href="https://arxiv.org/html/2403.14320v2" title="View HTML" id="html-2403.14320" aria-labelledby="html-2403.14320" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2403.14320" title="Other formats" id="oth-2403.14320" aria-labelledby="oth-2403.14320">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Exosense: A Vision-Based Scene Understanding System For Exoskeletons </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+J">Jianeng Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mattamala,+M">Matias Mattamala</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kassab,+C">Christina Kassab</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Burger,+G">Guillaume Burger</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Elnecave,+F">Fabio Elnecave</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+L">Lintong Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Petriaux,+M">Marine Petriaux</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fallon,+M">Maurice Fallon</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 8 pages, 9 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> Self-balancing exoskeletons are a key enabling technology for individuals with mobility impairments. While the current challenges focus on human-compliant hardware and control, unlocking their use for daily activities requires a scene perception system. In this work, we present Exosense, a vision-centric scene understanding system for self-balancing exoskeletons. We introduce a multi-sensor visual-inertial mapping device as well as a navigation stack for state estimation, terrain mapping and long-term operation. We tested Exosense attached to both a human leg and Wandercraft's Personal Exoskeleton in real-world indoor scenarios. This enabled us to test the system during typical periodic walking gaits, as well as future uses in multi-story environments. We demonstrate that Exosense can achieve an odometry drift of about 4 cm per meter traveled, and construct terrain maps under 1 cm average reconstruction error. It can also work in a visual localization mode in a previously mapped environment, providing a step towards long-term operation of exoskeletons. </p> </div> </dd> <dt> <a name='item488'>[488]</a> <a href ="/abs/2403.14931" title="Abstract" id="2403.14931"> arXiv:2403.14931 </a> (replaced) [<a href="/pdf/2403.14931" title="Download PDF" id="pdf-2403.14931" aria-labelledby="pdf-2403.14931">pdf</a>, <a href="https://arxiv.org/html/2403.14931v2" title="View HTML" id="html-2403.14931" aria-labelledby="html-2403.14931" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2403.14931" title="Other formats" id="oth-2403.14931" aria-labelledby="oth-2403.14931">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Structured stability analysis of networked systems with uncertain links </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Mariano,+S">Simone Mariano</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Cantoni,+M">Michael Cantoni</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Systems and Control (eess.SY)</span>; Dynamical Systems (math.DS) </div> <p class='mathjax'> An input-output approach to stability analysis is explored for networked systems with uncertain link dynamics. The main result consists of a collection of integral quadratic constraints, which together imply robust stability of the uncertain networked system, under the assumption that stability is achieved with ideal links. The conditions are decentralized inasmuch as each involves only agent and uncertainty model parameters that are local to a corresponding link. This makes the main result, which imposes no restriction on network structure, suitable for the study of large-scale systems. </p> </div> </dd> <dt> <a name='item489'>[489]</a> <a href ="/abs/2403.19797" title="Abstract" id="2403.19797"> arXiv:2403.19797 </a> (replaced) [<a href="/pdf/2403.19797" title="Download PDF" id="pdf-2403.19797" aria-labelledby="pdf-2403.19797">pdf</a>, <a href="https://arxiv.org/html/2403.19797v4" title="View HTML" id="html-2403.19797" aria-labelledby="html-2403.19797" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2403.19797" title="Other formats" id="oth-2403.19797" aria-labelledby="oth-2403.19797">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Efficient 3D Instance Mapping and Localization with Neural Fields </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Tang,+G">George Tang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jatavallabhula,+K+M">Krishna Murthy Jatavallabhula</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Torralba,+A">Antonio Torralba</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> We tackle the problem of learning an implicit scene representation for 3D instance segmentation from a sequence of posed RGB images. Towards this, we introduce 3DIML, a novel framework that efficiently learns a neural label field which can render 3D instance segmentation masks from novel viewpoints. Opposed to prior art that optimizes a neural field in a self-supervised manner, requiring complicated training procedures and loss function design, 3DIML leverages a two-phase process. The first phase, InstanceMap, takes as input 2D segmentation masks of the image sequence generated by a frontend instance segmentation model, and associates corresponding masks across images to 3D labels. These almost 3D-consistent pseudolabel masks are then used in the second phase, InstanceLift, to supervise the training of a neural label field, which interpolates regions missed by InstanceMap and resolves ambiguities. Additionally, we introduce InstanceLoc, which enables near realtime localization of instance masks given a trained neural label field. We evaluate 3DIML on sequences from the Replica and ScanNet datasets and demonstrate its effectiveness under mild assumptions for the image sequences. We achieve a large practical speedup over existing implicit scene representation methods with comparable quality, showcasing its potential to facilitate faster and more effective 3D scene understanding. </p> </div> </dd> <dt> <a name='item490'>[490]</a> <a href ="/abs/2403.19912" title="Abstract" id="2403.19912"> arXiv:2403.19912 </a> (replaced) [<a href="/pdf/2403.19912" title="Download PDF" id="pdf-2403.19912" aria-labelledby="pdf-2403.19912">pdf</a>, <a href="https://arxiv.org/html/2403.19912v2" title="View HTML" id="html-2403.19912" aria-labelledby="html-2403.19912" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2403.19912" title="Other formats" id="oth-2403.19912" aria-labelledby="oth-2403.19912">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Automated Identification and Segmentation of Hi Sources in CRAFTS Using Deep Learning Method </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Song,+Z">Zihao Song</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+H">Huaxi Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Quan,+D">Donghui Quan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+D">Di Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zheng,+Y">Yinghui Zheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ni,+S">Shulei Ni</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+Y">Yunchuan Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zheng,+Y">Yun Zheng</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 8 pages, 8 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Astrophysics of Galaxies (astro-ph.GA); Instrumentation and Methods for Astrophysics (astro-ph.IM) </div> <p class='mathjax'> Identifying neutral hydrogen (\hi) galaxies from observational data is a significant challenge in \hi\ galaxy surveys. With the advancement of observational technology, especially with the advent of large-scale telescope projects such as FAST and SKA, the significant increase in data volume presents new challenges for the efficiency and accuracy of data <a href="http://processing.To" rel="external noopener nofollow" class="link-external link-http">this http URL</a> address this challenge, in this study, we present a machine learning-based method for extracting \hi\ sources from the three-dimensional (3D) spectral data obtained from the Commensal Radio Astronomy FAST Survey (CRAFTS). We have carefully assembled a specialized dataset, HISF, rich in \hi\ sources, specifically designed to enhance the detection process. Our model, Unet-LK, utilizes the advanced 3D-Unet segmentation architecture and employs an elongated convolution kernel to effectively capture the intricate structures of \hi\ sources. This strategy ensures a reliable identification and segmentation of \hi\ sources, achieving notable performance metrics with a recall rate of 91.6\% and an accuracy of 95.7\%. These results substantiate the robustness of our dataset and the effectiveness of our proposed network architecture in the precise identification of \hi\ sources. Our code and dataset is publicly available at \url{<a href="https://github.com/fishszh/HISF" rel="external noopener nofollow" class="link-external link-https">this https URL</a>}. </p> </div> </dd> <dt> <a name='item491'>[491]</a> <a href ="/abs/2404.02702" title="Abstract" id="2404.02702"> arXiv:2404.02702 </a> (replaced) [<a href="/pdf/2404.02702" title="Download PDF" id="pdf-2404.02702" aria-labelledby="pdf-2404.02702">pdf</a>, <a href="https://arxiv.org/html/2404.02702v3" title="View HTML" id="html-2404.02702" aria-labelledby="html-2404.02702" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2404.02702" title="Other formats" id="oth-2404.02702" aria-labelledby="oth-2404.02702">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> PSCodec: A Series of High-Fidelity Low-bitrate Neural Speech Codecs Leveraging Prompt Encoders </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Pan,+Y">Yu Pan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+X">Xiang Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+Y">Yuguang Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yao,+J">Jixun Yao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hu,+Y">Yanni Hu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ye,+J">Jianhao Ye</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+H">Hongbin Zhou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ma,+L">Lei Ma</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+J">Jianjun Zhao</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Submiited to TASLP </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Neural speech codecs have recently emerged as a focal point in the fields of speech compression and generation. Despite this progress, achieving high-quality speech reconstruction under low-bitrate scenarios remains a significant challenge. In this paper, we propose PSCodec, a series of neural speech codecs based on prompt encoders, comprising PSCodec-Base, PSCodec-DRL-ICT, and PSCodec-CasAN, which are capable of delivering high-performance speech reconstruction with low bandwidths. Specifically, we first introduce PSCodec-Base, which leverages a pretrained speaker verification model-based prompt encoder (VPP-Enc) and a learnable Mel-spectrogram-based prompt encoder (MelP-Enc) to effectively disentangle and integrate voiceprint and Mel-related features in utterances. To further enhance feature utilization efficiency, we propose PSCodec-DRL-ICT, incorporating a structural similarity (SSIM) based disentangled representation loss (DRL) and an incremental continuous training (ICT) strategy. While PSCodec-DRL-ICT demonstrates impressive performance, its reliance on extensive hyperparameter tuning and multi-stage training makes it somewhat labor-intensive. To circumvent these limitations, we propose PSCodec-CasAN, utilizing an advanced cascaded attention network (CasAN) to enhance representational capacity of the entire system. Extensive experiments show that our proposed PSCodec-Base, PSCodec-DRL-ICT, and PSCodec-CasAN all significantly outperform several state-of-the-art neural codecs, exhibiting substantial improvements in both speech reconstruction quality and speaker similarity under low-bitrate conditions. </p> </div> </dd> <dt> <a name='item492'>[492]</a> <a href ="/abs/2404.04254" title="Abstract" id="2404.04254"> arXiv:2404.04254 </a> (replaced) [<a href="/pdf/2404.04254" title="Download PDF" id="pdf-2404.04254" aria-labelledby="pdf-2404.04254">pdf</a>, <a href="https://arxiv.org/html/2404.04254v3" title="View HTML" id="html-2404.04254" aria-labelledby="html-2404.04254" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2404.04254" title="Other formats" id="oth-2404.04254" aria-labelledby="oth-2404.04254">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Watermark-based Attribution of AI-Generated Content </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Jiang,+Z">Zhengyuan Jiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Guo,+M">Moyang Guo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hu,+Y">Yuepeng Hu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gong,+N+Z">Neil Zhenqiang Gong</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Cryptography and Security (cs.CR)</span>; Artificial Intelligence (cs.AI); Computation and Language (cs.CL); Computer Vision and Pattern Recognition (cs.CV); Machine Learning (cs.LG) </div> <p class='mathjax'> Several companies have deployed watermark-based detection to identify AI-generated content. However, attribution--the ability to trace back to the user of a generative AI (GenAI) service who created a given piece of AI-generated content--remains largely unexplored despite its growing importance. In this work, we aim to bridge this gap by conducting the first systematic study on watermark-based, user-level attribution of AI-generated content. Our key idea is to assign a unique watermark to each user of the GenAI service and embed this watermark into the AI-generated content created by that user. Attribution is then performed by identifying the user whose watermark best matches the one extracted from the given content. This approach, however, faces a key challenge: How should watermarks be selected for users to maximize attribution performance? To address the challenge, we first theoretically derive lower bounds on detection and attribution performance through rigorous probabilistic analysis for any given set of user watermarks. Then, we select watermarks for users to maximize these lower bounds, thereby optimizing detection and attribution performance. Our theoretical and empirical results show that watermark-based attribution inherits both the accuracy and (non-)robustness properties of the underlying watermark. Specifically, attribution remains highly accurate when the watermarked AI-generated content is either not post-processed or subjected to common post-processing such as JPEG compression, as well as black-box adversarial post-processing with limited query budgets. </p> </div> </dd> <dt> <a name='item493'>[493]</a> <a href ="/abs/2404.04856" title="Abstract" id="2404.04856"> arXiv:2404.04856 </a> (replaced) [<a href="/pdf/2404.04856" title="Download PDF" id="pdf-2404.04856" aria-labelledby="pdf-2404.04856">pdf</a>, <a href="https://arxiv.org/html/2404.04856v2" title="View HTML" id="html-2404.04856" aria-labelledby="html-2404.04856" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2404.04856" title="Other formats" id="oth-2404.04856" aria-labelledby="oth-2404.04856">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Msmsfnet: a multi-stream and multi-scale fusion net for edge detection </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+C">Chenguang Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+C">Chisheng Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dong,+F">Feifei Dong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xiao,+X">Xiayang Xiao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Su,+X">Xin Su</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+C">Chuanhua Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+D">Dejin Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+Q">Qingquan Li</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Edge detection is a long-standing problem in computer vision. Recent deep learning based algorithms achieve state-of-the-art performance in publicly available datasets. Despite their efficiency, their performance, however, relies heavily on the pre-trained weights of the backbone network on the ImageNet dataset. This significantly limits the design space of deep learning based edge detectors. Whenever we want to devise a new model, we have to train this new model on the ImageNet dataset first, and then fine-tune the model using the edge detection datasets. The comparison would be unfair otherwise. However, it is usually not feasible for many researchers to train a model on the ImageNet dataset due to the limited computation resources. Besides, if these methods need to be trained to detect edges in a different kind of data, Synthetic Aperture Radar (SAR) images for instance, the pre-trained weights on the ImageNet dataset are unlikely to improve the edge detection accuracy due to the strong differences in the statistics between optical and SAR images. In the meantime, no dataset for SAR image processing matches the size of the ImageNet dataset. In this work, we study the performance achievable by existing methods in publicly available datasets when they are trained from scratch, and devise a new network architecture, the multi-stream and multi-scale fusion net (msmsfnet), for edge detection. We show in our experiments that by training all models from scratch to ensure the fairness of comparison, our model outperforms state-of-the-art deep learning based edge detectors in three publicly available datasets. The efficiency of our model is further demonstrated by the experiments for edge detection in SAR images, which serves as an important evidence showing the meaningfulness of this work as no useful pre-trained weight is available for edge detection in SAR images. </p> </div> </dd> <dt> <a name='item494'>[494]</a> <a href ="/abs/2404.08217" title="Abstract" id="2404.08217"> arXiv:2404.08217 </a> (replaced) [<a href="/pdf/2404.08217" title="Download PDF" id="pdf-2404.08217" aria-labelledby="pdf-2404.08217">pdf</a>, <a href="/format/2404.08217" title="Other formats" id="oth-2404.08217" aria-labelledby="oth-2404.08217">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Escape with Your Self: A Solution to the Avoidance Problem with Decidable Bidirectional Typing for Reachability Types </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Jia,+S">Songlin Jia</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wei,+G">Guannan Wei</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=He,+S">Siyuan He</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bao,+Y">Yuyan Bao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rompf,+T">Tiark Rompf</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Programming Languages (cs.PL)</span> </div> <p class='mathjax'> Despite Rust's success in system programming, its ``shared XOR mutable'' principle significantly restricts how mutable values can be used, precluding many useful functional programming idioms. Reachability types are a recent proposal to address the key limitations of Rust-style approaches by tracking, rather than prohibiting, shared, escaping, and mutable data, even in the presence of higher-order functions and polymorphic types. The key to enabling tracking in the presence of avoidance is their notion of self-references. Similar to this pointers in OO languages, self-references expose the reachability of enclosing objects to internal components. While they help track escaped data, they present major challenges in designing expressive subtyping and decidable typing algorithms, as they involve subtle interactions with bounds and variance. This lack of an effective type checking algorithm is a key impediment toward making reachability types truly practical and leveraging them to bring the benefits of programming with lifetimes to practical higher-level languages. <br>In this paper, we investigate the issues of subtyping and type checking of self-references, to fully enable this avoidance solution. We address key gaps in previous work by proposing a refined notion of subtyping, which supports encoding datatypes without resorting to term-level coercions, making the overall system more expressive. We also develop a sound and decidable bidirectional typing algorithm, formally verified in Coq. </p> </div> </dd> <dt> <a name='item495'>[495]</a> <a href ="/abs/2404.11121" title="Abstract" id="2404.11121"> arXiv:2404.11121 </a> (replaced) [<a href="/pdf/2404.11121" title="Download PDF" id="pdf-2404.11121" aria-labelledby="pdf-2404.11121">pdf</a>, <a href="https://arxiv.org/html/2404.11121v2" title="View HTML" id="html-2404.11121" aria-labelledby="html-2404.11121" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2404.11121" title="Other formats" id="oth-2404.11121" aria-labelledby="oth-2404.11121">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> TransLinkGuard: Safeguarding Transformer Models Against Model Stealing in Edge Deployment </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+Q">Qinfeng Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shen,+Z">Zhiqiang Shen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Qin,+Z">Zhenghan Qin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xie,+Y">Yangfan Xie</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+X">Xuhong Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Du,+T">Tianyu Du</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yin,+J">Jianwei Yin</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted by ACM MM24 Conference </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Cryptography and Security (cs.CR)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Proprietary large language models (LLMs) have been widely applied in various scenarios. Additionally, deploying LLMs on edge devices is trending for efficiency and privacy reasons. However, edge deployment of proprietary LLMs introduces new security challenges: edge-deployed models are exposed as white-box accessible to users, enabling adversaries to conduct effective model stealing (MS) attacks. Unfortunately, existing defense mechanisms fail to provide effective protection. Specifically, we identify four critical protection properties that existing methods fail to simultaneously satisfy: (1) maintaining protection after a model is physically copied; (2) authorizing model access at request level; (3) safeguarding runtime reverse engineering; (4) achieving high security with negligible runtime overhead. To address the above issues, we propose TransLinkGuard, a plug-and-play model protection approach against model stealing on edge devices. The core part of TransLinkGuard is a lightweight authorization module residing in a secure environment, e.g., TEE. The authorization module can freshly authorize each request based on its input. Extensive experiments show that TransLinkGuard achieves the same security protection as the black-box security guarantees with negligible overhead. </p> </div> </dd> <dt> <a name='item496'>[496]</a> <a href ="/abs/2404.11977" title="Abstract" id="2404.11977"> arXiv:2404.11977 </a> (replaced) [<a href="/pdf/2404.11977" title="Download PDF" id="pdf-2404.11977" aria-labelledby="pdf-2404.11977">pdf</a>, <a href="https://arxiv.org/html/2404.11977v4" title="View HTML" id="html-2404.11977" aria-labelledby="html-2404.11977" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2404.11977" title="Other formats" id="oth-2404.11977" aria-labelledby="oth-2404.11977">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Mens Sana In Corpore Sano: Sound Firmware Corpora for Vulnerability Research </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Helmke,+R">Ren茅 Helmke</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Padilla,+E">Elmar Padilla</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Aschenbruck,+N">Nils Aschenbruck</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted Manuscript for the 2025 Network and Distributed System Security Symposium (NDSS'25) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Cryptography and Security (cs.CR)</span>; Digital Libraries (cs.DL) </div> <p class='mathjax'> Firmware corpora for vulnerability research should be scientifically sound. Yet, several practical challenges complicate the creation of sound corpora: Sample acquisition, e.g., is hard and one must overcome the barrier of proprietary or encrypted data. As image contents are unknown prior analysis, it is hard to select high-quality samples that can satisfy scientific demands. Ideally, we help each other out by sharing data. But here, sharing is problematic due to copyright laws. Instead, papers must carefully document each step of corpus creation: If a step is unclear, replicability is jeopardized. This has cascading effects on result verifiability, representativeness, and, thus, soundness. <br>Despite all challenges, how can we maintain the soundness of firmware corpora? This paper thoroughly analyzes the problem space and investigates its impact on research: We distill practical binary analysis challenges that significantly influence corpus creation. We use these insights to derive guidelines that help researchers to nurture corpus replicability and representativeness. We apply them to 44 top tier papers and systematically analyze scientific corpus creation practices. Our comprehensive analysis confirms that there is currently no common ground in related work. It shows the added value of our guidelines, as they discover methodical issues in corpus creation and unveil miniscule step stones in documentation. These blur visions on representativeness, hinder replicability, and, thus, negatively impact the soundness of otherwise excellent work. <br>Finally, we show the feasibility of our guidelines and build a new, replicable corpus for large-scale analyses on Linux firmware: LFwC. We share rich meta data for good (and proven) replicability. We verify unpacking, deduplicate, identify contents, provide ground truth, and show LFwC's utility for research. </p> </div> </dd> <dt> <a name='item497'>[497]</a> <a href ="/abs/2404.16583" title="Abstract" id="2404.16583"> arXiv:2404.16583 </a> (replaced) [<a href="/pdf/2404.16583" title="Download PDF" id="pdf-2404.16583" aria-labelledby="pdf-2404.16583">pdf</a>, <a href="https://arxiv.org/html/2404.16583v3" title="View HTML" id="html-2404.16583" aria-labelledby="html-2404.16583" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2404.16583" title="Other formats" id="oth-2404.16583" aria-labelledby="oth-2404.16583">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Fast Machine-Precision Spectral Likelihoods for Stationary Time Series </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Geoga,+C+J">Christopher J. Geoga</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Numerical Analysis (math.NA)</span>; Computation (stat.CO); Methodology (stat.ME) </div> <p class='mathjax'> We provide in this work an algorithm for approximating a very broad class of symmetric Toeplitz matrices to machine precision in $\mathcal{O}(n \log n)$ time with applications to fitting time series models. In particular, for a symmetric Toeplitz matrix $\mathbf{\Sigma}$ with values $\mathbf{\Sigma}_{j,k} = h_{|j-k|} = \int_{-1/2}^{1/2} e^{2 \pi i |j-k| \omega} S(\omega) \mathrm{d} \omega$ where $S(\omega)$ is piecewise smooth, we give an approximation $\mathbf{\mathcal{F}} \mathbf{\Sigma} \mathbf{\mathcal{F}}^H \approx \mathbf{D} + \mathbf{U} \mathbf{V}^H$, where $\mathbf{\mathcal{F}}$ is the DFT matrix, $\mathbf{D}$ is diagonal, and the matrices $\mathbf{U}$ and $\mathbf{V}$ are in $\mathbb{C}^{n \times r}$ with $r \ll n$. Studying these matrices in the context of time series, we offer a theoretical explanation of this structure and connect it to existing spectral-domain approximation frameworks. We then give a complete discussion of the numerical method for assembling the approximation and demonstrate its efficiency for improving Whittle-type likelihood approximations, including dramatic examples where a correction of rank $r = 2$ to the standard Whittle approximation increases the accuracy of the log-likelihood approximation from $3$ to $14$ digits for a matrix $\mathbf{\Sigma} \in \mathbb{R}^{10^5 \times 10^5}$. The method and analysis of this work applies well beyond time series analysis, providing an algorithm for extremely accurate solutions to linear systems with a wide variety of symmetric Toeplitz matrices whose entries are generated by a piecewise smooth $S(\omega)$. The analysis employed here largely depends on asymptotic expansions of oscillatory integrals, and also provides a new perspective on when existing spectral-domain approximation methods for Gaussian log-likelihoods can be particularly problematic. </p> </div> </dd> <dt> <a name='item498'>[498]</a> <a href ="/abs/2404.17687" title="Abstract" id="2404.17687"> arXiv:2404.17687 </a> (replaced) [<a href="/pdf/2404.17687" title="Download PDF" id="pdf-2404.17687" aria-labelledby="pdf-2404.17687">pdf</a>, <a href="https://arxiv.org/html/2404.17687v2" title="View HTML" id="html-2404.17687" aria-labelledby="html-2404.17687" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2404.17687" title="Other formats" id="oth-2404.17687" aria-labelledby="oth-2404.17687">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Knowledge Transfer for Cross-Domain Reinforcement Learning: A Systematic Review </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Serrano,+S+A">Sergio A. Serrano</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Martinez-Carranza,+J">Jose Martinez-Carranza</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sucar,+L+E">L. Enrique Sucar</a></div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> IEEE Access, Volume 12, 2024, Pages 114552-114572 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Robotics (cs.RO) </div> <p class='mathjax'> Reinforcement Learning (RL) provides a framework in which agents can be trained, via trial and error, to solve complex decision-making problems. Learning with little supervision causes RL methods to require large amounts of data, rendering them too expensive for many applications (e.g., robotics). By reusing knowledge from a different task, knowledge transfer methods present an alternative to reduce the training time in RL. Given the severe data scarcity, due to their flexibility, there has been a growing interest in methods capable of transferring knowledge across different domains (i.e., problems with different representations). However, identifying similarities and adapting knowledge across tasks from different domains requires matching their representations or finding domain-invariant features. These processes can be data-demanding, which poses the main challenge in cross-domain knowledge transfer: to select and transform knowledge in a data-efficient way, such that it accelerates learning in the target task, despite the presence of significant differences across problems (e.g., robots with distinct morphologies). Thus, this review presents a unifying analysis of methods focused on transferring knowledge across different domains. Through a taxonomy based on a transfer-approach categorization and a characterization of works based on their data-assumption requirements, the contributions of this article are 1) a comprehensive and systematic revision of knowledge transfer methods for the cross-domain RL setting, 2) a categorization and characterization of such methods to provide an analysis based on relevant features such as their transfer approach and data requirements, and 3) a discussion on the main challenges regarding cross-domain knowledge transfer, as well as on ideas of future directions worth exploring to address these problems. </p> </div> </dd> <dt> <a name='item499'>[499]</a> <a href ="/abs/2404.19513" title="Abstract" id="2404.19513"> arXiv:2404.19513 </a> (replaced) [<a href="/pdf/2404.19513" title="Download PDF" id="pdf-2404.19513" aria-labelledby="pdf-2404.19513">pdf</a>, <a href="https://arxiv.org/html/2404.19513v4" title="View HTML" id="html-2404.19513" aria-labelledby="html-2404.19513" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2404.19513" title="Other formats" id="oth-2404.19513" aria-labelledby="oth-2404.19513">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Smartphone-Based Method for Assessing Tomato Nutrient Status through Trichome Density Measurement </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ueda,+S">Sho Ueda</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ye,+X">Xujun Ye</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Early detection of fertilizer-induced stress in tomato plants is crucial for optimizing crop yield through timely management interventions. While conventional optical methods struggle to detect fertilizer stress in young leaves, these leaves contain valuable diagnostic information through their microscopic hair-like structures, particularly trichomes, which existing approaches have overlooked. This study introduces a smartphone-based noninvasive technique that leverages mobile computing and digital imaging capabilities to quantify trichome density on young leaves with superior detection latency. Our method uniquely combines augmented reality technology with image processing algorithms to analyze trichomes transferred onto specialized measurement paper. A robust automated pipeline processes these images through region extraction, perspective transformation, and illumination correction to precisely quantify trichome density. Validation experiments on hydroponically grown tomatoes under varying fertilizer conditions demonstrated the method's effectiveness. Leave-one-out cross-validation revealed strong predictive performance with the area under the precision-recall curve (PR-AUC: 0.82) and area under the receiver operating characteristic curve (ROC-AUC: 0.64), while the predicted and observed trichome densities exhibited high correlation ($r = 0.79$). This innovative approach transforms smartphones into precise diagnostic tools for plant nutrition assessment, offering a practical, cost-effective solution for precision agriculture. </p> </div> </dd> <dt> <a name='item500'>[500]</a> <a href ="/abs/2405.01425" title="Abstract" id="2405.01425"> arXiv:2405.01425 </a> (replaced) [<a href="/pdf/2405.01425" title="Download PDF" id="pdf-2405.01425" aria-labelledby="pdf-2405.01425">pdf</a>, <a href="https://arxiv.org/html/2405.01425v2" title="View HTML" id="html-2405.01425" aria-labelledby="html-2405.01425" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2405.01425" title="Other formats" id="oth-2405.01425" aria-labelledby="oth-2405.01425">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> In-and-Out: Algorithmic Diffusion for Sampling Convex Bodies </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Kook,+Y">Yunbum Kook</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Vempala,+S+S">Santosh S. Vempala</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+M+S">Matthew S. Zhang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 33 pages. To appear in NeurIPS 2024 (spotlight). Improve Lemma 22 and 26 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Data Structures and Algorithms (cs.DS)</span>; Machine Learning (cs.LG); Statistics Theory (math.ST); Machine Learning (stat.ML) </div> <p class='mathjax'> We present a new random walk for uniformly sampling high-dimensional convex bodies. It achieves state-of-the-art runtime complexity with stronger guarantees on the output than previously known, namely in R茅nyi divergence (which implies TV, $\mathcal{W}_2$, KL, $\chi^2$). The proof departs from known approaches for polytime algorithms for the problem -- we utilize a stochastic diffusion perspective to show contraction to the target distribution with the rate of convergence determined by functional isoperimetric constants of the stationary density. </p> </div> </dd> <dt> <a name='item501'>[501]</a> <a href ="/abs/2405.05567" title="Abstract" id="2405.05567"> arXiv:2405.05567 </a> (replaced) [<a href="/pdf/2405.05567" title="Download PDF" id="pdf-2405.05567" aria-labelledby="pdf-2405.05567">pdf</a>, <a href="https://arxiv.org/html/2405.05567v2" title="View HTML" id="html-2405.05567" aria-labelledby="html-2405.05567" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2405.05567" title="Other formats" id="oth-2405.05567" aria-labelledby="oth-2405.05567">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Perfect Subset Privacy in Polynomial Computation via Reed-Muller Information Super-sets </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Deng,+Z">Zirui Deng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ramkumar,+V">Vinayak Ramkumar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Raviv,+N">Netanel Raviv</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Extension of ISIT 2024 publication; currently under review </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Information Theory (cs.IT)</span> </div> <p class='mathjax'> Delegating large-scale computations to service providers is a common practice which raises privacy concerns. This paper studies information-theoretic privacy-preserving delegation of data to a service provider, who may further delegate the computation to auxiliary worker nodes, in order to compute a polynomial over that data at a later point in time. We study techniques which are compatible with robust management of distributed computation systems, an area known as coded computing. Privacy in coded computing, however, has traditionally addressed the problem of colluding workers, and assumed that the server that administrates the computation is trusted. This viewpoint of privacy does not accurately reflect real-world privacy concerns, since normally, the service provider as a whole (i.e., the administrator and the worker nodes) form one cohesive entity which itself poses a privacy risk. This paper aims to shift the focus of privacy in coded computing to safeguarding the privacy of the user against the service provider as a whole, instead of merely against colluding workers inside the service provider. To this end, we leverage the recently defined notion of perfect subset privacy, which guarantees zero information leakage from all subsets of the data up to a certain size. Using known techniques from Reed-Muller decoding, we provide a scheme which enables polynomial computation with perfect subset privacy in straggler-free systems. Furthermore, by studying information super-sets in Reed-Muller codes, which may be of independent interest, we extend the previous scheme to tolerate straggling worker nodes inside the service provider. </p> </div> </dd> <dt> <a name='item502'>[502]</a> <a href ="/abs/2405.06375" title="Abstract" id="2405.06375"> arXiv:2405.06375 </a> (replaced) [<a href="/pdf/2405.06375" title="Download PDF" id="pdf-2405.06375" aria-labelledby="pdf-2405.06375">pdf</a>, <a href="https://arxiv.org/html/2405.06375v2" title="View HTML" id="html-2405.06375" aria-labelledby="html-2405.06375" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2405.06375" title="Other formats" id="oth-2405.06375" aria-labelledby="oth-2405.06375">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Accuracy and Stability of CUR decompositions with Oversampling </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Park,+T">Taejun Park</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Nakatsukasa,+Y">Yuji Nakatsukasa</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 31 pages, 4 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Numerical Analysis (math.NA)</span> </div> <p class='mathjax'> This work investigates the accuracy and numerical stability of CUR decompositions with oversampling. The CUR decomposition approximates a matrix using a subset of columns and rows of the matrix. When the number of columns and the rows are the same, the CUR decomposition can become unstable and less accurate due to the presence of the matrix inverse in the core matrix. Nevertheless, we demonstrate that the CUR decomposition can be implemented in a numerical stable manner and illustrate that oversampling, which increases either the number of columns or rows in the CUR decomposition, can enhance its accuracy and stability. Additionally, this work devises an algorithm for oversampling motivated by the theory of the CUR decomposition and the cosine-sine decomposition, whose competitiveness is illustrated through experiments. </p> </div> </dd> <dt> <a name='item503'>[503]</a> <a href ="/abs/2405.07460" title="Abstract" id="2405.07460"> arXiv:2405.07460 </a> (replaced) [<a href="/pdf/2405.07460" title="Download PDF" id="pdf-2405.07460" aria-labelledby="pdf-2405.07460">pdf</a>, <a href="https://arxiv.org/html/2405.07460v4" title="View HTML" id="html-2405.07460" aria-labelledby="html-2405.07460" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2405.07460" title="Other formats" id="oth-2405.07460" aria-labelledby="oth-2405.07460">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> HoneyBee: A Scalable Modular Framework for Creating Multimodal Oncology Datasets with Foundational Embedding Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Tripathi,+A">Aakash Tripathi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Waqas,+A">Asim Waqas</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Schabath,+M+B">Matthew B. Schabath</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yilmaz,+Y">Yasin Yilmaz</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rasool,+G">Ghulam Rasool</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Databases (cs.DB) </div> <p class='mathjax'> Developing accurate machine learning models for oncology requires large-scale, high-quality multimodal datasets. However, creating such datasets remains challenging due to the complexity and heterogeneity of medical data. To address this challenge, we introduce HoneyBee, a scalable modular framework for building multimodal oncology datasets that leverages foundation models to generate representative embeddings. HoneyBee integrates various data modalities, including clinical diagnostic and pathology imaging data, medical notes, reports, records, and molecular data. It employs data preprocessing techniques and foundation models to generate embeddings that capture the essential features and relationships within the raw medical data. The generated embeddings are stored in a structured format using Hugging Face datasets and PyTorch dataloaders for accessibility. Vector databases enable efficient querying and retrieval for machine learning applications. We demonstrate the effectiveness of HoneyBee through experiments assessing the quality and representativeness of these embeddings. The framework is designed to be extensible to other medical domains and aims to accelerate oncology research by providing high-quality, machine learning-ready datasets. HoneyBee is an ongoing open-source effort, and the code, datasets, and models are available at the project repository. </p> </div> </dd> <dt> <a name='item504'>[504]</a> <a href ="/abs/2405.09223" title="Abstract" id="2405.09223"> arXiv:2405.09223 </a> (replaced) [<a href="/pdf/2405.09223" title="Download PDF" id="pdf-2405.09223" aria-labelledby="pdf-2405.09223">pdf</a>, <a href="https://arxiv.org/html/2405.09223v2" title="View HTML" id="html-2405.09223" aria-labelledby="html-2405.09223" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2405.09223" title="Other formats" id="oth-2405.09223" aria-labelledby="oth-2405.09223">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Word Alignment as Preference for Machine Translation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+Q">Qiyu Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nagata,+M">Masaaki Nagata</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Miao,+Z">Zhongtao Miao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tsuruoka,+Y">Yoshimasa Tsuruoka</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> EMNLP 2024 Main </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> The problem of hallucination and omission, a long-standing problem in machine translation (MT), is more pronounced when a large language model (LLM) is used in MT because an LLM itself is susceptible to these phenomena. In this work, we mitigate the problem in an LLM-based MT model by guiding it to better word alignment. We first study the correlation between word alignment and the phenomena of hallucination and omission in MT. Then we propose to utilize word alignment as preference to optimize the LLM-based MT model. The preference data are constructed by selecting chosen and rejected translations from multiple MT tools. Subsequently, direct preference optimization is used to optimize the LLM-based model towards the preference signal. Given the absence of evaluators specifically designed for hallucination and omission in MT, we further propose selecting hard instances and utilizing GPT-4 to directly evaluate the performance of the models in mitigating these issues. We verify the rationality of these designed evaluation methods by experiments, followed by extensive results demonstrating the effectiveness of word alignment-based preference optimization to mitigate hallucination and omission. On the other hand, although it shows promise in mitigating hallucination and omission, the overall performance of MT in different language directions remains mixed, with slight increases in BLEU and decreases in COMET. </p> </div> </dd> <dt> <a name='item505'>[505]</a> <a href ="/abs/2405.13337" title="Abstract" id="2405.13337"> arXiv:2405.13337 </a> (replaced) [<a href="/pdf/2405.13337" title="Download PDF" id="pdf-2405.13337" aria-labelledby="pdf-2405.13337">pdf</a>, <a href="https://arxiv.org/html/2405.13337v2" title="View HTML" id="html-2405.13337" aria-labelledby="html-2405.13337" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2405.13337" title="Other formats" id="oth-2405.13337" aria-labelledby="oth-2405.13337">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Semantic Equitable Clustering: A Simple and Effective Strategy for Clustering Vision Tokens </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Fan,+Q">Qihang Fan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+H">Huaibo Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+M">Mingrui Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=He,+R">Ran He</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> The Vision Transformer (ViT) has gained prominence for its superior relational modeling prowess. However, its global attention mechanism's quadratic complexity poses substantial computational burdens. A common remedy spatially groups tokens for self-attention, reducing computational requirements. Nonetheless, this strategy neglects semantic information in tokens, possibly scattering semantically-linked tokens across distinct groups, thus compromising the efficacy of self-attention intended for modeling inter-token dependencies. Motivated by these insights, we introduce a fast and balanced clustering method, named <br>\textbf{S}emantic \textbf{E}quitable \textbf{C}lustering (SEC). SEC clusters tokens based on their global semantic relevance in an efficient, straightforward manner. In contrast to traditional clustering methods requiring multiple iterations, our method achieves token clustering in a single pass. Additionally, SEC regulates the number of tokens per cluster, ensuring a balanced distribution for effective parallel processing on current computational platforms without necessitating further optimization. Capitalizing on SEC, we propose a versatile vision backbone, SECViT. Comprehensive experiments in image classification, object detection, instance segmentation, and semantic segmentation validate the effectiveness of SECViT. Moreover, SEC can be conveniently and swiftly applied to multimodal large language models (MLLM), such as LLaVA, to serve as a vision language connector, effectively accelerating the model's efficiency while maintaining unchanged or better performance. </p> </div> </dd> <dt> <a name='item506'>[506]</a> <a href ="/abs/2405.14342" title="Abstract" id="2405.14342"> arXiv:2405.14342 </a> (replaced) [<a href="/pdf/2405.14342" title="Download PDF" id="pdf-2405.14342" aria-labelledby="pdf-2405.14342">pdf</a>, <a href="https://arxiv.org/html/2405.14342v3" title="View HTML" id="html-2405.14342" aria-labelledby="html-2405.14342" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2405.14342" title="Other formats" id="oth-2405.14342" aria-labelledby="oth-2405.14342">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> RoGs: Large Scale Road Surface Reconstruction with Meshgrid Gaussian </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Feng,+Z">Zhiheng Feng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+W">Wenhua Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Deng,+T">Tianchen Deng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+H">Hesheng Wang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Road surface reconstruction plays a crucial role in autonomous driving, which can be used for road lane perception and autolabeling. Recently, mesh-based road surface reconstruction algorithms have shown promising reconstruction results. However, these mesh-based methods suffer from slow speed and poor reconstruction quality. To address these limitations, we propose a novel large-scale road surface reconstruction approach with meshgrid Gaussian, named RoGs. Specifically, we model the road surface by placing Gaussian surfels in the vertices of a uniformly distributed square mesh, where each surfel stores color, semantic, and geometric information. This square mesh-based layout covers the entire road with fewer Gaussian surfels and reduces the overlap between Gaussian surfels during training. In addition, because the road surface has no thickness, 2D Gaussian surfel is more consistent with the physical reality of the road surface than 3D Gaussian sphere. Then, unlike previous initialization methods that rely on point clouds, we introduce a vehicle pose-based initialization method to initialize the height and rotation of the Gaussian surfel. Thanks to this meshgrid Gaussian modeling and pose-based initialization, our method achieves significant speedups while improving reconstruction quality. We obtain excellent results in reconstruction of road surfaces in a variety of challenging real-world scenes. </p> </div> </dd> <dt> <a name='item507'>[507]</a> <a href ="/abs/2405.15145" title="Abstract" id="2405.15145"> arXiv:2405.15145 </a> (replaced) [<a href="/pdf/2405.15145" title="Download PDF" id="pdf-2405.15145" aria-labelledby="pdf-2405.15145">pdf</a>, <a href="https://arxiv.org/html/2405.15145v3" title="View HTML" id="html-2405.15145" aria-labelledby="html-2405.15145" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2405.15145" title="Other formats" id="oth-2405.15145" aria-labelledby="oth-2405.15145">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> CulturePark: Boosting Cross-cultural Understanding in Large Language Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+C">Cheng Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Teney,+D">Damien Teney</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+L">Linyi Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wen,+Q">Qingsong Wen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xie,+X">Xing Xie</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+J">Jindong Wang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> NeurIPS 2024; Code is released at <a href="https://github.com/Scarelette/CulturePark" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. arXiv admin note: substantial text overlap with <a href="https://arxiv.org/abs/2402.10946" data-arxiv-id="2402.10946" class="link-https">arXiv:2402.10946</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Artificial Intelligence (cs.AI)</span>; Computation and Language (cs.CL); Multiagent Systems (cs.MA) </div> <p class='mathjax'> Cultural bias is pervasive in many large language models (LLMs), largely due to the deficiency of data representative of different cultures. Typically, cultural datasets and benchmarks are constructed either by extracting subsets of existing datasets or by aggregating from platforms such as Wikipedia and social media. However, these approaches are highly dependent on real-world data and human annotations, making them costly and difficult to scale. Inspired by cognitive theories on social communication, this paper introduces CulturePark, an LLM-powered multi-agent communication framework for cultural data collection. CulturePark simulates cross-cultural human communication with LLM-based agents playing roles in different cultures. It generates high-quality cross-cultural dialogues encapsulating human beliefs, norms, and customs. Using CulturePark, we generated 41,000 cultural samples to fine-tune eight culture-specific LLMs. We evaluated these models across three downstream tasks: content moderation, cultural alignment, and cultural education. Results show that for content moderation, our GPT-3.5-based models either match or outperform GPT-4 on datasets. Regarding cultural alignment, our models surpass GPT-4 on Hofstede's VSM 13 framework. Furthermore, for cultural education of human participants, our models demonstrate superior outcomes in both learning efficacy and user experience compared to GPT-4. CulturePark proves an important step in addressing cultural bias and advancing the democratization of AI, highlighting the critical role of culturally inclusive data in model training. Code is released at <a href="https://github.com/Scarelette/CulturePark" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item508'>[508]</a> <a href ="/abs/2405.16200" title="Abstract" id="2405.16200"> arXiv:2405.16200 </a> (replaced) [<a href="/pdf/2405.16200" title="Download PDF" id="pdf-2405.16200" aria-labelledby="pdf-2405.16200">pdf</a>, <a href="https://arxiv.org/html/2405.16200v2" title="View HTML" id="html-2405.16200" aria-labelledby="html-2405.16200" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2405.16200" title="Other formats" id="oth-2405.16200" aria-labelledby="oth-2405.16200">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> FlightPatchNet: Multi-Scale Patch Network with Differential Coding for Flight Trajectory Prediction </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+L">Lan Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+X">Xuebin Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chu,+R">Ruijuan Chu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+G">Guangyi Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+Y">Yingchun Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+J">Jing Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+L">Linyu Wang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Accurate multi-step flight trajectory prediction plays an important role in Air Traffic Control, which can ensure the safety of air transportation. Two main issues limit the flight trajectory prediction performance of existing works. The first issue is the negative impact on prediction accuracy caused by the significant differences in data range. The second issue is that real-world flight trajectories involve underlying temporal dependencies, and existing methods fail to reveal the hidden complex temporal variations and only extract features from one single time scale. To address the above issues, we propose FlightPatchNet, a multi-scale patch network with differential coding for flight trajectory prediction. Specifically, FlightPatchNet first utilizes the differential coding to encode the original values of longitude and latitude into first-order differences and generates embeddings for all variables at each time step. Then, a global temporal attention is introduced to explore the dependencies between different time steps. To fully explore the diverse temporal patterns in flight trajectories, a multi-scale patch network is delicately designed to serve as the backbone. The multi-scale patch network exploits stacked patch mixer blocks to capture inter- and intra-patch dependencies under different time scales, and further integrates multi-scale temporal features across different scales and variables. Finally, FlightPatchNet ensembles multiple predictors to make direct multi-step prediction. Extensive experiments on ADS-B datasets demonstrate that our model outperforms the competitive baselines. </p> </div> </dd> <dt> <a name='item509'>[509]</a> <a href ="/abs/2405.17158" title="Abstract" id="2405.17158"> arXiv:2405.17158 </a> (replaced) [<a href="/pdf/2405.17158" title="Download PDF" id="pdf-2405.17158" aria-labelledby="pdf-2405.17158">pdf</a>, <a href="https://arxiv.org/html/2405.17158v4" title="View HTML" id="html-2405.17158" aria-labelledby="html-2405.17158" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2405.17158" title="Other formats" id="oth-2405.17158" aria-labelledby="oth-2405.17158">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> PatchScaler: An Efficient Patch-Independent Diffusion Model for Image Super-Resolution </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Y">Yong Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dong,+H">Hang Dong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pan,+J">Jinshan Pan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dong,+Q">Qingji Dong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+K">Kai Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+R">Rongxiang Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fu,+L">Lean Fu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+F">Fei Wang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> While diffusion models significantly improve the perceptual quality of super-resolved images, they usually require a large number of sampling steps, resulting in high computational costs and long inference times. Recent efforts have explored reasonable acceleration schemes by reducing the number of sampling steps. However, these approaches treat all regions of the image equally, overlooking the fact that regions with varying levels of reconstruction difficulty require different sampling steps. To address this limitation, we propose PatchScaler, an efficient patch-independent diffusion pipeline for single image super-resolution. Specifically, PatchScaler introduces a Patch-adaptive Group Sampling (PGS) strategy that groups feature patches by quantifying their reconstruction difficulty and establishes shortcut paths with different sampling configurations for each group. To further optimize the patch-level reconstruction process of PGS, we propose a texture prompt that provides rich texture conditional information to the diffusion model. The texture prompt adaptively retrieves texture priors for the target patch from a common reference texture memory. Extensive experiments show that our PatchScaler achieves superior performance in both quantitative and qualitative evaluations, while significantly speeding up inference. Our code will be available at \url{<a href="https://github.com/yongliuy/PatchScaler" rel="external noopener nofollow" class="link-external link-https">this https URL</a>}. </p> </div> </dd> <dt> <a name='item510'>[510]</a> <a href ="/abs/2405.17410" title="Abstract" id="2405.17410"> arXiv:2405.17410 </a> (replaced) [<a href="/pdf/2405.17410" title="Download PDF" id="pdf-2405.17410" aria-labelledby="pdf-2405.17410">pdf</a>, <a href="https://arxiv.org/html/2405.17410v2" title="View HTML" id="html-2405.17410" aria-labelledby="html-2405.17410" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2405.17410" title="Other formats" id="oth-2405.17410" aria-labelledby="oth-2405.17410">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> The Peripatetic Hater: Predicting Movement Among Hate Subreddits </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Hickey,+D">Daniel Hickey</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fessler,+D+M">Daniel M.T. Fessler</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lerman,+K">Kristina Lerman</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Burghardt,+K">Keith Burghardt</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 18 pages, 13 figures. Accepted to the Proceedings of the International AAAI Conference on Web and Social Media 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Social and Information Networks (cs.SI)</span>; Computers and Society (cs.CY); Human-Computer Interaction (cs.HC) </div> <p class='mathjax'> Many online hate groups exist to disparage others based on race, gender identity, sex, or other characteristics. The accessibility of these communities allows users to join multiple types of hate groups (e.g., a racist community and a misogynistic community), raising the question of whether users who join additional types of hate communities could be further radicalized compared to users who stay in one type of hate group. However, little is known about the dynamics of joining multiple types of hate groups, nor the effect of these groups on peripatetic users. We develop a new method to classify hate subreddits and the identities they disparage, then apply it to understand better how users come to join different types of hate subreddits. The hate classification technique utilizes human-validated deep learning models to extract the protected identities attacked, if any, across 168 subreddits. We find distinct clusters of subreddits targeting various identities, such as racist subreddits, xenophobic subreddits, and transphobic subreddits. We show that when users become active in their first hate subreddit, they have a high likelihood of becoming active in additional hate subreddits of a different category. We also find that users who join additional hate subreddits, especially those of a different category develop a wider hate group lexicon. These results then lead us to train a deep learning model that, as we demonstrate, usefully predicts the hate categories in which users will become active based on post text replied to and written. The accuracy of this model may be partly driven by peripatetic users often using the language of hate subreddits they eventually join. Overall, these results highlight the unique risks associated with hate communities on a social media platform, as discussion of alternative targets of hate may lead users to target more protected identities. </p> </div> </dd> <dt> <a name='item511'>[511]</a> <a href ="/abs/2405.18299" title="Abstract" id="2405.18299"> arXiv:2405.18299 </a> (replaced) [<a href="/pdf/2405.18299" title="Download PDF" id="pdf-2405.18299" aria-labelledby="pdf-2405.18299">pdf</a>, <a href="https://arxiv.org/html/2405.18299v4" title="View HTML" id="html-2405.18299" aria-labelledby="html-2405.18299" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2405.18299" title="Other formats" id="oth-2405.18299" aria-labelledby="oth-2405.18299">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Deep Learning Innovations for Underwater Waste Detection: An In-Depth Analysis </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Walia,+J+S">Jaskaran Singh Walia</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=K,+P+L">Pavithra L K</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG); Robotics (cs.RO) </div> <p class='mathjax'> Addressing the issue of submerged underwater trash is crucial for safeguarding aquatic ecosystems and preserving marine life. While identifying debris present on the surface of water bodies is straightforward, assessing the underwater submerged waste is a challenge due to the image distortions caused by factors such as light refraction, absorption, suspended particles, color shifts, and occlusion. This paper conducts a comprehensive review of state-of-the-art architectures and on the existing datasets to establish a baseline for submerged waste and trash detection. The primary goal remains to establish the benchmark of the object localization techniques to be leveraged by advanced underwater sensors and autonomous underwater vehicles. The ultimate objective is to explore the underwater environment, to identify, and remove underwater debris. The absence of benchmarks (dataset or algorithm) in many researches emphasizes the need for a more robust algorithmic solution. Through this research, we aim to give performance comparative analysis of various underwater trash detection algorithms. </p> </div> </dd> <dt> <a name='item512'>[512]</a> <a href ="/abs/2405.19040" title="Abstract" id="2405.19040"> arXiv:2405.19040 </a> (replaced) [<a href="/pdf/2405.19040" title="Download PDF" id="pdf-2405.19040" aria-labelledby="pdf-2405.19040">pdf</a>, <a href="https://arxiv.org/html/2405.19040v3" title="View HTML" id="html-2405.19040" aria-labelledby="html-2405.19040" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2405.19040" title="Other formats" id="oth-2405.19040" aria-labelledby="oth-2405.19040">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Finite-Choice Logic Programming </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Martens,+C">Chris Martens</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Simmons,+R+J">Robert J. Simmons</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Arntzenius,+M">Michael Arntzenius</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted for publication at POPL 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Programming Languages (cs.PL)</span>; Logic in Computer Science (cs.LO) </div> <p class='mathjax'> Logic programming, as exemplified by datalog, defines the meaning of a program as its unique smallest model: the deductive closure of its inference rules. However, many problems call for an enumeration of models that vary along some set of choices while maintaining structural and logical constraints -- there is no single canonical model. The notion of stable models for logic programs with negation has successfully captured programmer intuition about the set of valid solutions for such problems, giving rise to a family of programming languages and associated solvers known as answer set programming. Unfortunately, the definition of a stable model is frustratingly indirect, especially in the presence of rules containing free variables. <br>We propose a new formalism, finite-choice logic programming, that uses choice, not negation, to admit multiple solutions. Finite-choice logic programming contains all the expressive power of the stable model semantics, gives meaning to a new and useful class of programs, and enjoys a least-fixed-point interpretation over a novel domain. We present an algorithm for exploring the solution space and prove it correct with respect to our semantics. Our implementation, the Dusa logic programming language, has performance that compares favorably with state-of-the-art answer set solvers and exhibits more predictable scaling with problem size. </p> </div> </dd> <dt> <a name='item513'>[513]</a> <a href ="/abs/2405.20988" title="Abstract" id="2405.20988"> arXiv:2405.20988 </a> (replaced) [<a href="/pdf/2405.20988" title="Download PDF" id="pdf-2405.20988" aria-labelledby="pdf-2405.20988">pdf</a>, <a href="https://arxiv.org/html/2405.20988v4" title="View HTML" id="html-2405.20988" aria-labelledby="html-2405.20988" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2405.20988" title="Other formats" id="oth-2405.20988" aria-labelledby="oth-2405.20988">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Communication-Efficient Distributed Deep Learning via Federated Dynamic Averaging </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Theologitis,+M">Michail Theologitis</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Frangias,+G">Georgios Frangias</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Anestis,+G">Georgios Anestis</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Samoladas,+V">Vasilis Samoladas</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Deligiannakis,+A">Antonios Deligiannakis</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted as research paper at EDBT 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Distributed, Parallel, and Cluster Computing (cs.DC) </div> <p class='mathjax'> The ever-growing volume and decentralized nature of data, coupled with the need to harness it and extract knowledge, have led to the extensive use of distributed deep learning (DDL) techniques for training. These techniques rely on local training performed at distributed nodes using locally collected data, followed by a periodic synchronization process that combines these models to create a unified global model. However, the frequent synchronization of deep learning models, encompassing millions to many billions of parameters, creates a communication bottleneck, severely hindering scalability. Worse yet, DDL algorithms typically waste valuable bandwidth and render themselves less practical in bandwidth-constrained federated settings by relying on overly simplistic, periodic, and rigid synchronization schedules. These inefficiencies make the training process increasingly impractical as they demand excessive time for data communication. To address these shortcomings, we propose Federated Dynamic Averaging (FDA), a communication-efficient DDL strategy that dynamically triggers synchronization based on the value of the model variance. In essence, the costly synchronization step is triggered only if the local models -- initialized from a common global model after each synchronization -- have significantly diverged. This decision is facilitated by the transmission of a small local state from each distributed node. Through extensive experiments across a wide range of learning tasks we demonstrate that FDA reduces communication cost by orders of magnitude, compared to both traditional and cutting-edge communication-efficient algorithms. Additionally, we show that FDA maintains robust performance across diverse data heterogeneity settings. </p> </div> </dd> <dt> <a name='item514'>[514]</a> <a href ="/abs/2406.00777" title="Abstract" id="2406.00777"> arXiv:2406.00777 </a> (replaced) [<a href="/pdf/2406.00777" title="Download PDF" id="pdf-2406.00777" aria-labelledby="pdf-2406.00777">pdf</a>, <a href="https://arxiv.org/html/2406.00777v2" title="View HTML" id="html-2406.00777" aria-labelledby="html-2406.00777" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2406.00777" title="Other formats" id="oth-2406.00777" aria-labelledby="oth-2406.00777">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Diffusion Features to Bridge Domain Gap for Semantic Segmentation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ji,+Y">Yuxiang Ji</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=He,+B">Boyong He</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Qu,+C">Chenyuan Qu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tan,+Z">Zhuoyue Tan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Qin,+C">Chuan Qin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+L">Liaoni Wu</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> The code is released at <a href="https://github.com/Yux1angJi/DIFF" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Pre-trained diffusion models have demonstrated remarkable proficiency in synthesizing images across a wide range of scenarios with customizable prompts, indicating their effective capacity to capture universal features. Motivated by this, our study delves into the utilization of the implicit knowledge embedded within diffusion models to address challenges in cross-domain semantic segmentation. This paper investigates the approach that leverages the sampling and fusion techniques to harness the features of diffusion models efficiently. We propose DIffusion Feature Fusion (DIFF) as a backbone use for extracting and integrating effective semantic representations through the diffusion process. By leveraging the strength of text-to-image generation capability, we introduce a new training framework designed to implicitly learn posterior knowledge from it. Through rigorous evaluation in the contexts of domain generalization semantic segmentation, we establish that our methodology surpasses preceding approaches in mitigating discrepancies across distinct domains and attains the state-of-the-art (SOTA) benchmark. </p> </div> </dd> <dt> <a name='item515'>[515]</a> <a href ="/abs/2406.04165" title="Abstract" id="2406.04165"> arXiv:2406.04165 </a> (replaced) [<a href="/pdf/2406.04165" title="Download PDF" id="pdf-2406.04165" aria-labelledby="pdf-2406.04165">pdf</a>, <a href="https://arxiv.org/html/2406.04165v2" title="View HTML" id="html-2406.04165" aria-labelledby="html-2406.04165" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2406.04165" title="Other formats" id="oth-2406.04165" aria-labelledby="oth-2406.04165">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Repurposing Language Models into Embedding Models: Finding the Compute-Optimal Recipe </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ziarko,+A">Alicja Ziarko</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jiang,+A+Q">Albert Q. Jiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Piotrowski,+B">Bartosz Piotrowski</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+W">Wenda Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jamnik,+M">Mateja Jamnik</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mi%C5%82o%C5%9B,+P">Piotr Mi艂o艣</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> NeurIPS 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> Text embeddings are essential for many tasks, such as document retrieval, clustering, and semantic similarity assessment. In this paper, we study how to contrastively train text embedding models in a compute-optimal fashion, given a suite of pre-trained decoder-only language models. Our innovation is an algorithm that produces optimal configurations of model sizes, data quantities, and fine-tuning methods for text-embedding models at different computational budget levels. The resulting recipe, which we obtain through extensive experiments, can be used by practitioners to make informed design choices for their embedding models. Specifically, our findings suggest that full fine-tuning and low-rank adaptation fine-tuning produce optimal models at lower and higher computational budgets respectively. </p> </div> </dd> <dt> <a name='item516'>[516]</a> <a href ="/abs/2406.04289" title="Abstract" id="2406.04289"> arXiv:2406.04289 </a> (replaced) [<a href="/pdf/2406.04289" title="Download PDF" id="pdf-2406.04289" aria-labelledby="pdf-2406.04289">pdf</a>, <a href="https://arxiv.org/html/2406.04289v4" title="View HTML" id="html-2406.04289" aria-labelledby="html-2406.04289" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2406.04289" title="Other formats" id="oth-2406.04289" aria-labelledby="oth-2406.04289">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> What Languages are Easy to Language-Model? A Perspective from Learning Probabilistic Regular Languages </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Borenstein,+N">Nadav Borenstein</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Svete,+A">Anej Svete</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chan,+R">Robin Chan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Valvoda,+J">Josef Valvoda</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nowak,+F">Franz Nowak</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Augenstein,+I">Isabelle Augenstein</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chodroff,+E">Eleanor Chodroff</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cotterell,+R">Ryan Cotterell</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted to ACL 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span> </div> <p class='mathjax'> What can large language models learn? By definition, language models (LM) are distributions over strings. Therefore, an intuitive way of addressing the above question is to formalize it as a matter of learnability of classes of distributions over strings. While prior work in this direction focused on assessing the theoretical limits, in contrast, we seek to understand the empirical learnability. Unlike prior empirical work, we evaluate neural LMs on their home turf-learning probabilistic languages-rather than as classifiers of formal languages. In particular, we investigate the learnability of regular LMs (RLMs) by RNN and Transformer LMs. We empirically test the learnability of RLMs as a function of various complexity parameters of the RLM and the hidden state size of the neural LM. We find that the RLM rank, which corresponds to the size of linear space spanned by the logits of its conditional distributions, and the expected length of sampled strings are strong and significant predictors of learnability for both RNNs and Transformers. Several other predictors also reach significance, but with differing patterns between RNNs and Transformers. </p> </div> </dd> <dt> <a name='item517'>[517]</a> <a href ="/abs/2406.06371" title="Abstract" id="2406.06371"> arXiv:2406.06371 </a> (replaced) [<a href="/pdf/2406.06371" title="Download PDF" id="pdf-2406.06371" aria-labelledby="pdf-2406.06371">pdf</a>, <a href="https://arxiv.org/html/2406.06371v5" title="View HTML" id="html-2406.06371" aria-labelledby="html-2406.06371" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2406.06371" title="Other formats" id="oth-2406.06371" aria-labelledby="oth-2406.06371">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> mHuBERT-147: A Compact Multilingual HuBERT Model </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Boito,+M+Z">Marcely Zanon Boito</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Iyer,+V">Vivek Iyer</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lagos,+N">Nikolaos Lagos</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Besacier,+L">Laurent Besacier</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Calapodescu,+I">Ioan Calapodescu</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Extended version of the Interspeech 2024 paper of same name </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Sound (cs.SD); Audio and Speech Processing (eess.AS) </div> <p class='mathjax'> We present mHuBERT-147, the first general-purpose massively multilingual HuBERT speech representation model trained on 90K hours of clean, open-license data. To scale up the multi-iteration HuBERT approach, we use faiss-based clustering, achieving 5.2x faster label assignment than the original method. We also apply a new multilingual batching up-sampling strategy, leveraging both language and dataset diversity. After 3 training iterations, our compact 95M parameter mHuBERT-147 outperforms larger models trained on substantially more data. We rank second and first on the ML-SUPERB 10min and 1h leaderboards, with SOTA scores for 3 tasks. Across ASR/LID tasks, our model consistently surpasses XLS-R (300M params; 436K hours) and demonstrates strong competitiveness against the much larger MMS (1B params; 491K hours). Our findings indicate that mHuBERT-147 is a promising model for multilingual speech tasks, offering an unprecedented balance between high performance and parameter efficiency. </p> </div> </dd> <dt> <a name='item518'>[518]</a> <a href ="/abs/2406.07294" title="Abstract" id="2406.07294"> arXiv:2406.07294 </a> (replaced) [<a href="/pdf/2406.07294" title="Download PDF" id="pdf-2406.07294" aria-labelledby="pdf-2406.07294">pdf</a>, <a href="https://arxiv.org/html/2406.07294v2" title="View HTML" id="html-2406.07294" aria-labelledby="html-2406.07294" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2406.07294" title="Other formats" id="oth-2406.07294" aria-labelledby="oth-2406.07294">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> OTO Planner: An Efficient Only Travelling Once Exploration Planner for Complex and Unknown Environments </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+B">Bo Zhou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lu,+C">Chuanzhao Lu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pan,+Y">Yan Pan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+F">Fu Chen</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> Autonomous exploration in complex and cluttered environments is essential for various applications. However, there are many challenges due to the lack of global heuristic information. Existing exploration methods suffer from the repeated paths and considerable computational resource requirement in large-scale environments. To address the above issues, this letter proposes an efficient exploration planner that reduces repeated paths in complex environments, hence it is called "Only Travelling Once Planner". OTO Planner includes fast frontier updating, viewpoint evaluation and viewpoint refinement. A selective frontier updating mechanism is designed, saving a large amount of computational resources. In addition, a novel viewpoint evaluation system is devised to reduce the repeated paths utilizing the enclosed sub-region detection. Besides, a viewpoint refinement approach is raised to concentrate the redundant viewpoints, leading to smoother paths. We conduct extensive simulation and real-world experiments to validate the proposed method. Compared to the state-of-the-art approach, the proposed method reduces the exploration time and movement distance by 10%-20% and improves the speed of frontier detection by 6-9 times. </p> </div> </dd> <dt> <a name='item519'>[519]</a> <a href ="/abs/2406.07472" title="Abstract" id="2406.07472"> arXiv:2406.07472 </a> (replaced) [<a href="/pdf/2406.07472" title="Download PDF" id="pdf-2406.07472" aria-labelledby="pdf-2406.07472">pdf</a>, <a href="/format/2406.07472" title="Other formats" id="oth-2406.07472" aria-labelledby="oth-2406.07472">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> 4Real: Towards Photorealistic 4D Scene Generation via Video Diffusion Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+H">Heng Yu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+C">Chaoyang Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhuang,+P">Peiye Zhuang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Menapace,+W">Willi Menapace</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Siarohin,+A">Aliaksandr Siarohin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cao,+J">Junli Cao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jeni,+L+A">Laszlo A Jeni</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tulyakov,+S">Sergey Tulyakov</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lee,+H">Hsin-Ying Lee</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> NeurIPS 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Existing dynamic scene generation methods mostly rely on distilling knowledge from pre-trained 3D generative models, which are typically fine-tuned on synthetic object datasets. As a result, the generated scenes are often object-centric and lack photorealism. To address these limitations, we introduce a novel pipeline designed for photorealistic text-to-4D scene generation, discarding the dependency on multi-view generative models and instead fully utilizing video generative models trained on diverse real-world datasets. Our method begins by generating a reference video using the video generation model. We then learn the canonical 3D representation of the video using a freeze-time video, delicately generated from the reference video. To handle inconsistencies in the freeze-time video, we jointly learn a per-frame deformation to model these imperfections. We then learn the temporal deformation based on the canonical representation to capture dynamic interactions in the reference video. The pipeline facilitates the generation of dynamic scenes with enhanced photorealism and structural integrity, viewable from multiple perspectives, thereby setting a new standard in 4D scene generation. </p> </div> </dd> <dt> <a name='item520'>[520]</a> <a href ="/abs/2406.08222" title="Abstract" id="2406.08222"> arXiv:2406.08222 </a> (replaced) [<a href="/pdf/2406.08222" title="Download PDF" id="pdf-2406.08222" aria-labelledby="pdf-2406.08222">pdf</a>, <a href="/format/2406.08222" title="Other formats" id="oth-2406.08222" aria-labelledby="oth-2406.08222">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Sociotechnical Lens for Evaluating Computer Vision Models: A Case Study on Detecting and Reasoning about Gender and Emotion </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Luo,+S">Sha Luo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+S+J">Sang Jung Kim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Duan,+Z">Zening Duan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+K">Kaiping Chen</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI); Computers and Society (cs.CY); Human-Computer Interaction (cs.HC) </div> <p class='mathjax'> In the evolving landscape of computer vision (CV) technologies, the automatic detection and interpretation of gender and emotion in images is a critical area of study. This paper investigates social biases in CV models, emphasizing the limitations of traditional evaluation metrics such as precision, recall, and accuracy. These metrics often fall short in capturing the complexities of gender and emotion, which are fluid and culturally nuanced constructs. Our study proposes a sociotechnical framework for evaluating CV models, incorporating both technical performance measures and considerations of social fairness. Using a dataset of 5,570 images related to vaccination and climate change, we empirically compared the performance of various CV models, including traditional models like DeepFace and FER, and generative models like GPT-4 Vision. Our analysis involved manually validating the gender and emotional expressions in a subset of images to serve as benchmarks. Our findings reveal that while GPT-4 Vision outperforms other models in technical accuracy for gender classification, it exhibits discriminatory biases, particularly in response to transgender and non-binary personas. Furthermore, the model's emotion detection skew heavily towards positive emotions, with a notable bias towards associating female images with happiness, especially when prompted by male personas. These findings underscore the necessity of developing more comprehensive evaluation criteria that address both validity and discriminatory biases in CV models. Our proposed framework provides guidelines for researchers to critically assess CV tools, ensuring their application in communication research is both ethical and effective. The significant contribution of this study lies in its emphasis on a sociotechnical approach, advocating for CV technologies that support social good and mitigate biases rather than perpetuate them. </p> </div> </dd> <dt> <a name='item521'>[521]</a> <a href ="/abs/2406.08298" title="Abstract" id="2406.08298"> arXiv:2406.08298 </a> (replaced) [<a href="/pdf/2406.08298" title="Download PDF" id="pdf-2406.08298" aria-labelledby="pdf-2406.08298">pdf</a>, <a href="https://arxiv.org/html/2406.08298v5" title="View HTML" id="html-2406.08298" aria-labelledby="html-2406.08298" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2406.08298" title="Other formats" id="oth-2406.08298" aria-labelledby="oth-2406.08298">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> AdaNCA: Neural Cellular Automata As Adaptors For More Robust Vision Transformer </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+Y">Yitao Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+T">Tong Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=S%C3%BCsstrunk,+S">Sabine S眉sstrunk</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 32 pages, 12 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Vision Transformers (ViTs) demonstrate remarkable performance in image classification through visual-token interaction learning, particularly when equipped with local information via region attention or convolutions. Although such architectures improve the feature aggregation from different granularities, they often fail to contribute to the robustness of the networks. Neural Cellular Automata (NCA) enables the modeling of global visual-token representations through local interactions, with its training strategies and architecture design conferring strong generalization ability and robustness against noisy input. In this paper, we propose Adaptor Neural Cellular Automata (AdaNCA) for Vision Transformers that uses NCA as plug-and-play adaptors between ViT layers, thus enhancing ViT's performance and robustness against adversarial samples as well as out-of-distribution inputs. To overcome the large computational overhead of standard NCAs, we propose Dynamic Interaction for more efficient interaction learning. Using our analysis of AdaNCA placement and robustness improvement, we also develop an algorithm for identifying the most effective insertion points for AdaNCA. With less than a 3% increase in parameters, AdaNCA contributes to more than 10% absolute improvement in accuracy under adversarial attacks on the ImageNet1K benchmark. Moreover, we demonstrate with extensive evaluations across eight robustness benchmarks and four ViT architectures that AdaNCA, as a plug-and-play module, consistently improves the robustness of ViTs. </p> </div> </dd> <dt> <a name='item522'>[522]</a> <a href ="/abs/2406.08787" title="Abstract" id="2406.08787"> arXiv:2406.08787 </a> (replaced) [<a href="/pdf/2406.08787" title="Download PDF" id="pdf-2406.08787" aria-labelledby="pdf-2406.08787">pdf</a>, <a href="https://arxiv.org/html/2406.08787v2" title="View HTML" id="html-2406.08787" aria-labelledby="html-2406.08787" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2406.08787" title="Other formats" id="oth-2406.08787" aria-labelledby="oth-2406.08787">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Survey on Compositional Learning of AI Models: Theoretical and Experimental Practices </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Sinha,+S">Sania Sinha</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Premsri,+T">Tanawan Premsri</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kordjamshidi,+P">Parisa Kordjamshidi</a></div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> Transactions of Machine Learning Research, 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Artificial Intelligence (cs.AI)</span> </div> <p class='mathjax'> Compositional learning, mastering the ability to combine basic concepts and construct more intricate ones, is crucial for human cognition, especially in human language comprehension and visual perception. This notion is tightly connected to generalization over unobserved situations. Despite its integral role in intelligence, there is a lack of systematic theoretical and experimental research methodologies, making it difficult to analyze the compositional learning abilities of computational models. In this paper, we survey the literature on compositional learning of AI models and the connections made to cognitive studies. We identify abstract concepts of compositionality in cognitive and linguistic studies and connect these to the computational challenges faced by language and vision models in compositional reasoning. We overview the formal definitions, tasks, evaluation benchmarks, various computational models, and theoretical findings. Our primary focus is on linguistic benchmarks and combining language and vision, though there is a large amount of research on compositional concept learning in the computer vision community alone. We cover modern studies on large language models to provide a deeper understanding of the cutting-edge compositional capabilities exhibited by state-of-the-art AI models and pinpoint important directions for future research. </p> </div> </dd> <dt> <a name='item523'>[523]</a> <a href ="/abs/2406.10079" title="Abstract" id="2406.10079"> arXiv:2406.10079 </a> (replaced) [<a href="/pdf/2406.10079" title="Download PDF" id="pdf-2406.10079" aria-labelledby="pdf-2406.10079">pdf</a>, <a href="https://arxiv.org/html/2406.10079v3" title="View HTML" id="html-2406.10079" aria-labelledby="html-2406.10079" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2406.10079" title="Other formats" id="oth-2406.10079" aria-labelledby="oth-2406.10079">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Localizing Events in Videos with Multimodal Queries </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+G">Gengyuan Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fok,+M+L+A">Mang Ling Ada Fok</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ma,+J">Jialu Ma</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xia,+Y">Yan Xia</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cremers,+D">Daniel Cremers</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Torr,+P">Philip Torr</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tresp,+V">Volker Tresp</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gu,+J">Jindong Gu</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 20 pages (including references and appendix); for the project homepage, see <a href="https://icq-benchmark.github.io/" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Localizing events in videos based on semantic queries is a pivotal task in video understanding, with the growing significance of user-oriented applications like video search. Yet, current research predominantly relies on natural language queries (NLQs), overlooking the potential of using multimodal queries (MQs) that integrate images to more flexibly represent semantic queries -- especially when it is difficult to express non-verbal or unfamiliar concepts in words. To bridge this gap, we introduce ICQ, a new benchmark designed for localizing events in videos with MQs, alongside an evaluation dataset ICQ-Highlight. To accommodate and evaluate existing video localization models for this new task, we propose 3 Multimodal Query Adaptation methods and a novel Surrogate Fine-tuning on pseudo-MQs strategy. ICQ systematically benchmarks 12 state-of-the-art backbone models, spanning from specialized video localization models to Video LLMs, across diverse application domains. Our experiments highlight the high potential of MQs in real-world applications. We believe this benchmark is a first step toward advancing MQs in video event localization. </p> </div> </dd> <dt> <a name='item524'>[524]</a> <a href ="/abs/2406.10916" title="Abstract" id="2406.10916"> arXiv:2406.10916 </a> (replaced) [<a href="/pdf/2406.10916" title="Download PDF" id="pdf-2406.10916" aria-labelledby="pdf-2406.10916">pdf</a>, <a href="https://arxiv.org/html/2406.10916v2" title="View HTML" id="html-2406.10916" aria-labelledby="html-2406.10916" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2406.10916" title="Other formats" id="oth-2406.10916" aria-labelledby="oth-2406.10916">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> M-SET: Multi-Drone Swarm Intelligence Experimentation with Collision Avoidance Realism </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Qin,+C">Chuhao Qin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Robins,+A">Alexander Robins</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lillywhite-Roake,+C">Callum Lillywhite-Roake</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pearce,+A">Adam Pearce</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mehta,+H">Hritik Mehta</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=James,+S">Scott James</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wong,+T+H">Tsz Ho Wong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pournaras,+E">Evangelos Pournaras</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 7 pages, 7 figures. This work has been accepted by 2024 IEEE 49th Conference on Local Computer Networks (LCN) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span>; Distributed, Parallel, and Cluster Computing (cs.DC) </div> <p class='mathjax'> Distributed sensing by cooperative drone swarms is crucial for several Smart City applications, such as traffic monitoring and disaster response. Using an indoor lab with inexpensive drones, a testbed supports complex and ambitious studies on these systems while maintaining low cost, rigor, and external validity. This paper introduces the Multi-drone Sensing Experimentation Testbed (M-SET), a novel platform designed to prototype, develop, test, and evaluate distributed sensing with swarm intelligence. M-SET addresses the limitations of existing testbeds that fail to emulate collisions, thus lacking realism in outdoor environments. By integrating a collision avoidance method based on a potential field algorithm, M-SET ensures collision-free navigation and sensing, further optimized via a multi-agent collective learning algorithm. Extensive evaluation demonstrates accurate energy consumption estimation and a low risk of collisions, providing a robust proof-of-concept. New insights show that M-SET has significant potential to support ambitious research with minimal cost, simplicity, and high sensing quality. </p> </div> </dd> <dt> <a name='item525'>[525]</a> <a href ="/abs/2406.11919" title="Abstract" id="2406.11919"> arXiv:2406.11919 </a> (replaced) [<a href="/pdf/2406.11919" title="Download PDF" id="pdf-2406.11919" aria-labelledby="pdf-2406.11919">pdf</a>, <a href="https://arxiv.org/html/2406.11919v2" title="View HTML" id="html-2406.11919" aria-labelledby="html-2406.11919" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2406.11919" title="Other formats" id="oth-2406.11919" aria-labelledby="oth-2406.11919">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Graph Knowledge Distillation to Mixture of Experts </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Rumiantsev,+P">Pavel Rumiantsev</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Coates,+M">Mark Coates</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Machine Learning (stat.ML) </div> <p class='mathjax'> In terms of accuracy, Graph Neural Networks (GNNs) are the best architectural choice for the node classification task. Their drawback in real-world deployment is the latency that emerges from the neighbourhood processing operation. One solution to the latency issue is to perform knowledge distillation from a trained GNN to a Multi-Layer Perceptron (MLP), where the MLP processes only the features of the node being classified (and possibly some pre-computed structural information). However, the performance of such MLPs in both transductive and inductive settings remains inconsistent for existing knowledge distillation techniques. We propose to address the performance concerns by using a specially-designed student model instead of an MLP. Our model, named Routing-by-Memory (RbM), is a form of Mixture-of-Experts (MoE), with a design that enforces expert specialization. By encouraging each expert to specialize on a certain region on the hidden representation space, we demonstrate experimentally that it is possible to derive considerably more consistent performance across multiple datasets. Code available at <a href="https://github.com/Rufaim/routing-by-memory" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item526'>[526]</a> <a href ="/abs/2406.12031" title="Abstract" id="2406.12031"> arXiv:2406.12031 </a> (replaced) [<a href="/pdf/2406.12031" title="Download PDF" id="pdf-2406.12031" aria-labelledby="pdf-2406.12031">pdf</a>, <a href="https://arxiv.org/html/2406.12031v2" title="View HTML" id="html-2406.12031" aria-labelledby="html-2406.12031" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2406.12031" title="Other formats" id="oth-2406.12031" aria-labelledby="oth-2406.12031">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Large Scale Transfer Learning for Tabular Data via Language Modeling </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Gardner,+J">Josh Gardner</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Perdomo,+J+C">Juan C. Perdomo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Schmidt,+L">Ludwig Schmidt</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> NeurIPS 2024 camera-ready updates </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Computation and Language (cs.CL) </div> <p class='mathjax'> Tabular data -- structured, heterogeneous, spreadsheet-style data with rows and columns -- is widely used in practice across many domains. However, while recent foundation models have reduced the need for developing task-specific datasets and predictors in domains such as language modeling and computer vision, this transfer learning paradigm has not had similar impact in the tabular domain. In this work, we seek to narrow this gap and present TabuLa-8B, a language model for tabular prediction. We define a process for extracting a large, high-quality training dataset from the TabLib corpus, proposing methods for tabular data filtering and quality control. Using the resulting dataset, which comprises over 2.1B rows from over 4M unique tables, we fine-tune a Llama 3-8B large language model (LLM) for tabular data prediction (classification and binned regression) using a novel packing and attention scheme for tabular prediction. Through evaluation across a test suite of 329 datasets, we find that TabuLa-8B has zero-shot accuracy on unseen tables that is over 15 percentage points (pp) higher than random guessing, a feat that is not possible with existing state-of-the-art tabular prediction models (e.g. XGBoost, TabPFN). In the few-shot setting (1-32 shots), without any fine-tuning on the target datasets, TabuLa-8B is 5-15 pp more accurate than XGBoost and TabPFN models that are explicitly trained on equal, or even up to 16x more data. We release our model, code, and data along with the publication of this paper. </p> </div> </dd> <dt> <a name='item527'>[527]</a> <a href ="/abs/2406.12356" title="Abstract" id="2406.12356"> arXiv:2406.12356 </a> (replaced) [<a href="/pdf/2406.12356" title="Download PDF" id="pdf-2406.12356" aria-labelledby="pdf-2406.12356">pdf</a>, <a href="https://arxiv.org/html/2406.12356v3" title="View HTML" id="html-2406.12356" aria-labelledby="html-2406.12356" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2406.12356" title="Other formats" id="oth-2406.12356" aria-labelledby="oth-2406.12356">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Gradient Accumulation Method for Dense Retriever under Memory Constraint </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+J">Jaehee Kim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lee,+Y">Yukyung Lee</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kang,+P">Pilsung Kang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Information Retrieval (cs.IR)</span> </div> <p class='mathjax'> InfoNCE loss is commonly used to train dense retriever in information retrieval tasks. It is well known that a large batch is essential to stable and effective training with InfoNCE loss, which requires significant hardware resources. Due to the dependency of large batch, dense retriever has bottleneck of application and research. Recently, memory reduction methods have been broadly adopted to resolve the hardware bottleneck by decomposing forward and backward or using a memory bank. However, current methods still suffer from slow and unstable training. To address these issues, we propose Contrastive Accumulation (ContAccum), a stable and efficient memory reduction method for dense retriever trains that uses a dual memory bank structure to leverage previously generated query and passage representations. Experiments on widely used five information retrieval datasets indicate that ContAccum can surpass not only existing memory reduction methods but also high-resource scenario. Moreover, theoretical analysis and experimental results confirm that ContAccum provides more stable dual-encoder training than current memory bank utilization methods. </p> </div> </dd> <dt> <a name='item528'>[528]</a> <a href ="/abs/2406.12907" title="Abstract" id="2406.12907"> arXiv:2406.12907 </a> (replaced) [<a href="/pdf/2406.12907" title="Download PDF" id="pdf-2406.12907" aria-labelledby="pdf-2406.12907">pdf</a>, <a href="https://arxiv.org/html/2406.12907v3" title="View HTML" id="html-2406.12907" aria-labelledby="html-2406.12907" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2406.12907" title="Other formats" id="oth-2406.12907" aria-labelledby="oth-2406.12907">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Reconciling Kaplan and Chinchilla Scaling Laws </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Pearce,+T">Tim Pearce</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Song,+J">Jinyeop Song</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Published in TMLR 2024 </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> TMLR 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Computation and Language (cs.CL) </div> <p class='mathjax'> Kaplan et al. [2020] (`Kaplan') and Hoffmann et al. [2022] (`Chinchilla') studied the scaling behavior of transformers trained on next-token language prediction. These studies produced different estimates for how the number of parameters ($N$) and training tokens ($D$) should be set to achieve the lowest possible loss for a given compute budget ($C$). Kaplan: $N_\text{optimal} \propto C^{0.73}$, Chinchilla: $N_\text{optimal} \propto C^{0.50}$. This paper finds that much of this discrepancy can be attributed to Kaplan counting non-embedding rather than total parameters, combined with their analysis being performed at small scale. Simulating the Chinchilla study under these conditions produces biased scaling coefficients close to Kaplan's. Hence, this paper reaffirms Chinchilla's scaling coefficients, by explaining the primary cause of Kaplan's original overestimation. As a second contribution, the paper explains differences in the reported relationships between loss and compute. These findings lead us to recommend that future scaling studies use total parameters and compute. </p> </div> </dd> <dt> <a name='item529'>[529]</a> <a href ="/abs/2406.17335" title="Abstract" id="2406.17335"> arXiv:2406.17335 </a> (replaced) [<a href="/pdf/2406.17335" title="Download PDF" id="pdf-2406.17335" aria-labelledby="pdf-2406.17335">pdf</a>, <a href="https://arxiv.org/html/2406.17335v2" title="View HTML" id="html-2406.17335" aria-labelledby="html-2406.17335" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2406.17335" title="Other formats" id="oth-2406.17335" aria-labelledby="oth-2406.17335">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Thorough Performance Benchmarking on Lightweight Embedding-based Recommender Systems </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Tran,+H+V">Hung Vinh Tran</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+T">Tong Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nguyen,+Q+V+H">Quoc Viet Hung Nguyen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+Z">Zi Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cui,+L">Lizhen Cui</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yin,+H">Hongzhi Yin</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Information Retrieval (cs.IR)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Since the creation of the Web, recommender systems (RSs) have been an indispensable mechanism in information filtering. State-of-the-art RSs primarily depend on categorical features, which ecoded by embedding vectors, resulting in excessively large embedding tables. To prevent over-parameterized embedding tables from harming scalability, both academia and industry have seen increasing efforts in compressing RS embeddings. However, despite the prosperity of lightweight embedding-based RSs (LERSs), a wide diversity is seen in evaluation protocols, resulting in obstacles when relating LERS performance to real-world usability. Moreover, despite the common goal of lightweight embeddings, LERSs are evaluated with a single choice between the two main recommendation tasks -- collaborative filtering and content-based recommendation. This lack of discussions on cross-task transferability hinders the development of unified, more scalable solutions. Motivated by these issues, this study investigates various LERSs' performance, efficiency, and cross-task transferability via a thorough benchmarking process. Additionally, we propose an efficient embedding compression method using magnitude pruning, which is an easy-to-deploy yet highly competitive baseline that outperforms various complex LERSs. Our study reveals the distinct performance of LERSs across the two tasks, shedding light on their effectiveness and generalizability. To support edge-based recommendations, we tested all LERSs on a Raspberry Pi 4, where the efficiency bottleneck is exposed. Finally, we conclude this paper with critical summaries of LERS performance, model selection suggestions, and underexplored challenges around LERSs for future research. To encourage future research, we publish source codes and artifacts at \href{this link}{<a href="https://github.com/chenxing1999/recsys-benchmark" rel="external noopener nofollow" class="link-external link-https">this https URL</a>}. </p> </div> </dd> <dt> <a name='item530'>[530]</a> <a href ="/abs/2406.19430" title="Abstract" id="2406.19430"> arXiv:2406.19430 </a> (replaced) [<a href="/pdf/2406.19430" title="Download PDF" id="pdf-2406.19430" aria-labelledby="pdf-2406.19430">pdf</a>, <a href="/format/2406.19430" title="Other formats" id="oth-2406.19430" aria-labelledby="oth-2406.19430">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Invitation to Local Algorithms </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Rozho%C5%88,+V">V谩clav Rozho艌</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Distributed, Parallel, and Cluster Computing (cs.DC)</span>; Data Structures and Algorithms (cs.DS); Combinatorics (math.CO) </div> <p class='mathjax'> This text provides an introduction to distributed local algorithms -- an area at the intersection of theoretical computer science and discrete mathematics. We collect recent results in the area and demonstrate how they lead to a clean theory. We also discuss many connections of local algorithms to fields such as parallel, distributed, and sublinear algorithms, or descriptive combinatorics. </p> </div> </dd> <dt> <a name='item531'>[531]</a> <a href ="/abs/2407.01865" title="Abstract" id="2407.01865"> arXiv:2407.01865 </a> (replaced) [<a href="/pdf/2407.01865" title="Download PDF" id="pdf-2407.01865" aria-labelledby="pdf-2407.01865">pdf</a>, <a href="https://arxiv.org/html/2407.01865v2" title="View HTML" id="html-2407.01865" aria-labelledby="html-2407.01865" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2407.01865" title="Other formats" id="oth-2407.01865" aria-labelledby="oth-2407.01865">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Geometric Static Modeling Framework for Piecewise-Continuous Curved-Link Multi Point-of-Contact Tensegrity Robots </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ervin,+L">Lauren Ervin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Vikas,+V">Vishesh Vikas</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> This work is published on IEEE RA-L. Please refer to the published article below: <a href="https://ieeexplore.ieee.org/document/10734217" rel="external noopener nofollow" class="link-external link-https">this https URL</a> L. Ervin and V. Vikas, "Geometric Static Modeling Framework for Piecewise-Continuous Curved-Link Multi Point-of-Contact Tensegrity Robots," in IEEE Robotics and Automation Letters, vol. 9, no. 12, pp. 11066-11073, Dec. 2024, doi: <a href="https://doi.org/10.1109/LRA.2024.3486199" data-doi="10.1109/LRA.2024.3486199" class="link-https link-external" rel="external noopener nofollow">https://doi.org/10.1109/LRA.2024.3486199</a> </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> in IEEE Robotics and Automation Letters, vol. 9, pp. 11066-11073, 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span> </div> <p class='mathjax'> Tensegrities synergistically combine tensile (cable) and rigid (link) elements to achieve structural integrity, making them lightweight, packable, and impact resistant. Consequently, they have high potential for locomotion in unstructured environments. This research presents geometric modeling of a Tensegrity eXploratory Robot (TeXploR) comprised of two semi-circular, curved links held together by 12 prestressed cables and actuated with an internal mass shifting along each link. This design allows for efficient rolling with stability (e.g., tip-over on an incline). However, the unique design poses static and dynamic modeling challenges given the discontinuous nature of the semi-circular, curved links, two changing points of contact with the surface plane, and instantaneous movement of the masses along the links. The robot is modeled using a geometric approach where the holonomic constraints confirm the experimentally observed four-state hybrid system, proving TeXploR rolls along one link while pivoting about the end of the other. It also identifies the quasi-static state transition boundaries that enable a continuous change in the robot states via internal mass shifting. This is the first time in literature a non-spherical two-point contact system is kinematically and geometrically modeled. Furthermore, the static solutions are closed-form and do not require numerical exploration of the solution. The MATLAB simulations are experimentally validated on a tetherless prototype with mean absolute error of 4.36掳. </p> </div> </dd> <dt> <a name='item532'>[532]</a> <a href ="/abs/2407.05771" title="Abstract" id="2407.05771"> arXiv:2407.05771 </a> (replaced) [<a href="/pdf/2407.05771" title="Download PDF" id="pdf-2407.05771" aria-labelledby="pdf-2407.05771">pdf</a>, <a href="https://arxiv.org/html/2407.05771v3" title="View HTML" id="html-2407.05771" aria-labelledby="html-2407.05771" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2407.05771" title="Other formats" id="oth-2407.05771" aria-labelledby="oth-2407.05771">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Multi-times Monte Carlo Rendering for Inter-reflection Reconstruction </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+T">Tengjie Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+Z">Zhuo Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gao,+J">Jingnan Gao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yan,+Y">Yichao Yan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+X">Xiaokang Yang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 10 pages,6 figures, Accepted by NeurIPS 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Inverse rendering methods have achieved remarkable performance in reconstructing high-fidelity 3D objects with disentangled geometries, materials, and environmental light. However, they still face huge challenges in reflective surface reconstruction. Although recent methods model the light trace to learn specularity, the ignorance of indirect illumination makes it hard to handle inter-reflections among multiple smooth objects. In this work, we propose Ref-MC2 that introduces the multi-time Monte Carlo sampling which comprehensively computes the environmental illumination and meanwhile considers the reflective light from object surfaces. To address the computation challenge as the times of Monte Carlo sampling grow, we propose a specularity-adaptive sampling strategy, significantly reducing the computational complexity. Besides the computational resource, higher geometry accuracy is also required because geometric errors accumulate multiple times. Therefore, we further introduce a reflection-aware surface model to initialize the geometry and refine it during inverse rendering. We construct a challenging dataset containing scenes with multiple objects and inter-reflections. Experiments show that our method outperforms other inverse rendering methods on various object groups. We also show downstream applications, e.g., relighting and material editing, to illustrate the disentanglement ability of our method. </p> </div> </dd> <dt> <a name='item533'>[533]</a> <a href ="/abs/2407.07315" title="Abstract" id="2407.07315"> arXiv:2407.07315 </a> (replaced) [<a href="/pdf/2407.07315" title="Download PDF" id="pdf-2407.07315" aria-labelledby="pdf-2407.07315">pdf</a>, <a href="https://arxiv.org/html/2407.07315v2" title="View HTML" id="html-2407.07315" aria-labelledby="html-2407.07315" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2407.07315" title="Other formats" id="oth-2407.07315" aria-labelledby="oth-2407.07315">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> CosmoCLIP: Generalizing Large Vision-Language Models for Astronomical Imaging </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Imam,+R">Raza Imam</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Alam,+M+T">Mohammed Talha Alam</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rahman,+U">Umaima Rahman</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Guizani,+M">Mohsen Guizani</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Karray,+F">Fakhri Karray</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted at SPAICE Conference, ECSAT, UK, 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Existing vision-text contrastive learning models enhance representation transferability and support zero-shot prediction by matching paired image and caption embeddings while pushing unrelated pairs apart. However, astronomical image-label datasets are significantly smaller compared to general image and label datasets available from the internet. We introduce CosmoCLIP, an astronomical image-text contrastive learning framework precisely fine-tuned on the pre-trained CLIP model using SpaceNet and BLIP-based captions. SpaceNet, attained via FLARE, constitutes ~13k optimally distributed images, while BLIP acts as a rich knowledge extractor. The rich semantics derived from this SpaceNet and BLIP descriptions, when learned contrastively, enable CosmoCLIP to achieve superior generalization across various in-domain and out-of-domain tasks. Our results demonstrate that CosmoCLIP is a straightforward yet powerful framework, significantly outperforming CLIP in zero-shot classification and image-text retrieval tasks. </p> </div> </dd> <dt> <a name='item534'>[534]</a> <a href ="/abs/2407.10548" title="Abstract" id="2407.10548"> arXiv:2407.10548 </a> (replaced) [<a href="/pdf/2407.10548" title="Download PDF" id="pdf-2407.10548" aria-labelledby="pdf-2407.10548">pdf</a>, <a href="https://arxiv.org/html/2407.10548v3" title="View HTML" id="html-2407.10548" aria-labelledby="html-2407.10548" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2407.10548" title="Other formats" id="oth-2407.10548" aria-labelledby="oth-2407.10548">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Fluid Antenna Multiple Access Assisted Integrated Data and Energy Transfer: Outage and Multiplexing Gain Analysis </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Lin,+X">Xiao Lin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+Y">Yizhe Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+H">Halvin Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hu,+J">Jie Hu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wong,+K">Kai-Kit Wong</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> submitted to IEEE journal for possible publication </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Information Theory (cs.IT)</span> </div> <p class='mathjax'> Fluid antenna multiple access (FAMA) exploits the spatial opportunities in wireless channels to overcome multiuser interference by position (a.k.a.~port) switching, which can achieve better performance compared to traditional fixed multiple-input multiple-output (MIMO) systems. Additionally, integrated data and energy transfer (IDET) is capable of providing both wireless data transfer (WDT) and wireless energy transfer (WET) services towards low-power devices. In this paper, a FAMA-assisted IDET system is investigated, where a base station (BS) equipped with $N$ fixed antennas provides dedicated IDET services towards $N$ user equipments (UEs). Each UE is equipped with a single fluid antenna, while the power splitting (PS) approach is conceived for coordinating WDT and WET. The outage probabilities of both WDT and WET are derived and approximated into closed-forms, where the fluid antenna (FA) at each UE selects the optimal port to achieve the maximum signal-to-interference-plus-noise ratio (SINR) or the energy harvesting power (EHP). The IDET outage probabilities are defined and subsequently derived and approximated into closed-forms. Further, multiplexing gains of the proposed system are defined and analyzed to evaluate the performace. Numerical results validate the theoretical analysis, while also illustrate that the trade-off is achieved between WDT and WET performance by exploiting different port selection strategies. Furthermore, the number of UEs should be optimized to achieve better IDET performance of the system. </p> </div> </dd> <dt> <a name='item535'>[535]</a> <a href ="/abs/2407.11424" title="Abstract" id="2407.11424"> arXiv:2407.11424 </a> (replaced) [<a href="/pdf/2407.11424" title="Download PDF" id="pdf-2407.11424" aria-labelledby="pdf-2407.11424">pdf</a>, <a href="https://arxiv.org/html/2407.11424v2" title="View HTML" id="html-2407.11424" aria-labelledby="html-2407.11424" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2407.11424" title="Other formats" id="oth-2407.11424" aria-labelledby="oth-2407.11424">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Model Inversion Attacks Through Target-Specific Conditional Diffusion Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+O">Ouxiang Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hao,+Y">Yanbin Hao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Z">Zhicai Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+B">Bin Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+S">Shuo Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Z">Zaixi Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Feng,+F">Fuli Feng</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Model inversion attacks (MIAs) aim to reconstruct private images from a target classifier's training set, thereby raising privacy concerns in AI applications. Previous GAN-based MIAs tend to suffer from inferior generative fidelity due to GAN's inherent flaws and biased optimization within latent space. To alleviate these issues, leveraging on diffusion models' remarkable synthesis capabilities, we propose Diffusion-based Model Inversion (Diff-MI) attacks. Specifically, we introduce a novel target-specific conditional diffusion model (CDM) to purposely approximate target classifier's private distribution and achieve superior accuracy-fidelity balance. Our method involves a two-step learning paradigm. Step-1 incorporates the target classifier into the entire CDM learning under a pretrain-then-finetune fashion, with creating pseudo-labels as model conditions in pretraining and adjusting specified layers with image predictions in fine-tuning. Step-2 presents an iterative image reconstruction method, further enhancing the attack performance through a combination of diffusion priors and target knowledge. Additionally, we propose an improved max-margin loss that replaces the hard max with top-k maxes, fully leveraging feature information and soft labels from the target classifier. Extensive experiments demonstrate that Diff-MI significantly improves generative fidelity with an average decrease of 20\% in FID while maintaining competitive attack accuracy compared to state-of-the-art methods across various datasets and models. Our code is available at: \url{<a href="https://github.com/Ouxiang-Li/Diff-MI" rel="external noopener nofollow" class="link-external link-https">this https URL</a>}. </p> </div> </dd> <dt> <a name='item536'>[536]</a> <a href ="/abs/2407.13891" title="Abstract" id="2407.13891"> arXiv:2407.13891 </a> (replaced) [<a href="/pdf/2407.13891" title="Download PDF" id="pdf-2407.13891" aria-labelledby="pdf-2407.13891">pdf</a>, <a href="/format/2407.13891" title="Other formats" id="oth-2407.13891" aria-labelledby="oth-2407.13891">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> High Risk of Political Bias in Black Box Emotion Inference Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Plisiecki,+H">Hubert Plisiecki</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lenartowicz,+P">Pawe艂 Lenartowicz</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Flakus,+M">Maria Flakus</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pokropek,+A">Artur Pokropek</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> This paper investigates the presence of political bias in emotion inference models used for sentiment analysis (SA) in social science research. Machine learning models often reflect biases in their training data, impacting the validity of their outcomes. While previous research has highlighted gender and race biases, our study focuses on political bias - an underexplored yet pervasive issue that can skew the interpretation of text data across a wide array of studies. We conducted a bias audit on a Polish sentiment analysis model developed in our lab. By analyzing valence predictions for names and sentences involving Polish politicians, we uncovered systematic differences influenced by political affiliations. Our findings indicate that annotations by human raters propagate political biases into the model's predictions. To mitigate this, we pruned the training dataset of texts mentioning these politicians and observed a reduction in bias, though not its complete elimination. Given the significant implications of political bias in SA, our study emphasizes caution in employing these models for social science research. We recommend a critical examination of SA results and propose using lexicon-based systems as a more ideologically neutral alternative. This paper underscores the necessity for ongoing scrutiny and methodological adjustments to ensure the reliability and impartiality of the use of machine learning in academic and applied contexts. </p> </div> </dd> <dt> <a name='item537'>[537]</a> <a href ="/abs/2407.13979" title="Abstract" id="2407.13979"> arXiv:2407.13979 </a> (replaced) [<a href="/pdf/2407.13979" title="Download PDF" id="pdf-2407.13979" aria-labelledby="pdf-2407.13979">pdf</a>, <a href="https://arxiv.org/html/2407.13979v2" title="View HTML" id="html-2407.13979" aria-labelledby="html-2407.13979" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2407.13979" title="Other formats" id="oth-2407.13979" aria-labelledby="oth-2407.13979">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Truthfulness of Calibration Measures </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Haghtalab,+N">Nika Haghtalab</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Qiao,+M">Mingda Qiao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+K">Kunhe Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+E">Eric Zhao</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> To appear at NeurIPS 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Data Structures and Algorithms (cs.DS); Machine Learning (stat.ML) </div> <p class='mathjax'> We initiate the study of the truthfulness of calibration measures in sequential prediction. A calibration measure is said to be truthful if the forecaster (approximately) minimizes the expected penalty by predicting the conditional expectation of the next outcome, given the prior distribution of outcomes. Truthfulness is an important property of calibration measures, ensuring that the forecaster is not incentivized to exploit the system with deliberate poor forecasts. This makes it an essential desideratum for calibration measures, alongside typical requirements, such as soundness and completeness. <br>We conduct a taxonomy of existing calibration measures and their truthfulness. Perhaps surprisingly, we find that all of them are far from being truthful. That is, under existing calibration measures, there are simple distributions on which a polylogarithmic (or even zero) penalty is achievable, while truthful prediction leads to a polynomial penalty. Our main contribution is the introduction of a new calibration measure termed the Subsampled Smooth Calibration Error (SSCE) under which truthful prediction is optimal up to a constant multiplicative factor. </p> </div> </dd> <dt> <a name='item538'>[538]</a> <a href ="/abs/2407.15080" title="Abstract" id="2407.15080"> arXiv:2407.15080 </a> (replaced) [<a href="/pdf/2407.15080" title="Download PDF" id="pdf-2407.15080" aria-labelledby="pdf-2407.15080">pdf</a>, <a href="/format/2407.15080" title="Other formats" id="oth-2407.15080" aria-labelledby="oth-2407.15080">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> SNIP: Speculative Execution and Non-Interference Preservation for Compiler Transformations </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=van+der+Wall,+S">S枚ren van der Wall</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Meyer,+R">Roland Meyer</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Programming Languages (cs.PL)</span>; Cryptography and Security (cs.CR) </div> <p class='mathjax'> We address the problem of preserving non-interference across compiler transformations under speculative semantics. We develop a proof method that ensures the preservation uniformly across all source programs. The basis of our proof method is a new form of simulation relation. It operates over directives that model the attacker's control over the micro-architectural state, and it accounts for the fact that the compiler transformation may change the influence of the micro-architectural state on the execution (and hence the directives). Using our proof method, we show the correctness of dead code elimination. When we tried to prove register allocation correct, we identified a previously unknown weakness that introduces violations to non-interference. We have confirmed the weakness for a mainstream compiler on code from the libsodium cryptographic library. To reclaim security once more, we develop a novel static analysis that operates on a product of source program and register-allocated program. Using the analysis, we present an automated fix to existing register allocation implementations. We prove the correctness of the fixed register allocations with our proof method. </p> </div> </dd> <dt> <a name='item539'>[539]</a> <a href ="/abs/2407.17438" title="Abstract" id="2407.17438"> arXiv:2407.17438 </a> (replaced) [<a href="/pdf/2407.17438" title="Download PDF" id="pdf-2407.17438" aria-labelledby="pdf-2407.17438">pdf</a>, <a href="https://arxiv.org/html/2407.17438v3" title="View HTML" id="html-2407.17438" aria-labelledby="html-2407.17438" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2407.17438" title="Other formats" id="oth-2407.17438" aria-labelledby="oth-2407.17438">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> HumanVid: Demystifying Training Data for Camera-controllable Human Image Animation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Z">Zhenzhi Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+Y">Yixuan Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zeng,+Y">Yanhong Zeng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fang,+Y">Youqing Fang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Guo,+Y">Yuwei Guo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+W">Wenran Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tan,+J">Jing Tan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+K">Kai Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xue,+T">Tianfan Xue</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dai,+B">Bo Dai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lin,+D">Dahua Lin</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> NeurIPS D&B Track 2024 camera ready version, TL;DR: the first large-scale dataset for camera controllable human image animation task, and a baseline method </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> Human image animation involves generating videos from a character photo, allowing user control and unlocking the potential for video and movie production. While recent approaches yield impressive results using high-quality training data, the inaccessibility of these datasets hampers fair and transparent benchmarking. Moreover, these approaches prioritize 2D human motion and overlook the significance of camera motions in videos, leading to limited control and unstable video generation. To demystify the training data, we present HumanVid, the first large-scale high-quality dataset tailored for human image animation, which combines crafted real-world and synthetic data. For the real-world data, we compile a vast collection of real-world videos from the internet. We developed and applied careful filtering rules to ensure video quality, resulting in a curated collection of 20K high-resolution (1080P) human-centric videos. Human and camera motion annotation is accomplished using a 2D pose estimator and a SLAM-based method. To expand our synthetic dataset, we collected 10K 3D avatar assets and leveraged existing assets of body shapes, skin textures and clothings. Notably, we introduce a rule-based camera trajectory generation method, enabling the synthetic pipeline to incorporate diverse and precise camera motion annotation, which can rarely be found in real-world data. To verify the effectiveness of HumanVid, we establish a baseline model named CamAnimate, short for Camera-controllable Human Animation, that considers both human and camera motions as conditions. Through extensive experimentation, we demonstrate that such simple baseline training on our HumanVid achieves state-of-the-art performance in controlling both human pose and camera motions, setting a new benchmark. Demo, data and code could be found in the project website: <a href="https://humanvid.github.io/" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item540'>[540]</a> <a href ="/abs/2407.19523" title="Abstract" id="2407.19523"> arXiv:2407.19523 </a> (replaced) [<a href="/pdf/2407.19523" title="Download PDF" id="pdf-2407.19523" aria-labelledby="pdf-2407.19523">pdf</a>, <a href="https://arxiv.org/html/2407.19523v3" title="View HTML" id="html-2407.19523" aria-labelledby="html-2407.19523" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2407.19523" title="Other formats" id="oth-2407.19523" aria-labelledby="oth-2407.19523">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Robust Fast Adaptation from Adversarially Explicit Task Distribution Generation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+C">Cheems Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lv,+Y">Yiqin Lv</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mao,+Y">Yixiu Mao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Qu,+Y">Yun Qu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+Y">Yi Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ji,+X">Xiangyang Ji</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted by KDD 2025. The project is available at <a href="https://sites.google.com/view/ar-metalearn" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> Meta-learning is a practical learning paradigm to transfer skills across tasks from a few examples. Nevertheless, the existence of task distribution shifts tends to weaken meta-learners' generalization capability, particularly when the training task distribution is naively hand-crafted or based on simple priors that fail to cover critical scenarios sufficiently. Here, we consider explicitly generative modeling task distributions placed over task identifiers and propose robustifying fast adaptation from adversarial training. Our approach, which can be interpreted as a model of a Stackelberg game, not only uncovers the task structure during problem-solving from an explicit generative model but also theoretically increases the adaptation robustness in worst cases. This work has practical implications, particularly in dealing with task distribution shifts in meta-learning, and contributes to theoretical insights in the field. Our method demonstrates its robustness in the presence of task subpopulation shifts and improved performance over SOTA baselines in extensive experiments. The code will be available at the project site <a href="https://sites.google.com/view/ar-metalearn" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item541'>[541]</a> <a href ="/abs/2408.00754" title="Abstract" id="2408.00754"> arXiv:2408.00754 </a> (replaced) [<a href="/pdf/2408.00754" title="Download PDF" id="pdf-2408.00754" aria-labelledby="pdf-2408.00754">pdf</a>, <a href="https://arxiv.org/html/2408.00754v2" title="View HTML" id="html-2408.00754" aria-labelledby="html-2408.00754" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2408.00754" title="Other formats" id="oth-2408.00754" aria-labelledby="oth-2408.00754">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Coarse Correspondences Boost Spatial-Temporal Reasoning in Multimodal Language Model </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+B">Benlin Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dong,+Y">Yuhao Dong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Y">Yiqin Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ma,+Z">Zixian Ma</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tang,+Y">Yansong Tang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tang,+L">Luming Tang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rao,+Y">Yongming Rao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ma,+W">Wei-Chiu Ma</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Krishna,+R">Ranjay Krishna</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> project page: <a href="https://coarse-correspondence.github.io" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Multimodal language models (MLLMs) are increasingly being applied in real-world environments, necessitating their ability to interpret 3D spaces and comprehend temporal dynamics. Current methods often rely on specialized architectural designs or task-specific fine-tuning to achieve this. We introduce Coarse Correspondences, a simple lightweight method that enhances MLLMs' spatial-temporal reasoning with 2D images as input, without modifying the architecture or requiring task-specific fine-tuning. Our method uses a lightweight tracking model to identify primary object correspondences between frames in a video or across different image viewpoints, and then conveys this information to MLLMs through visual prompting. We demonstrate that this simple training-free approach brings substantial gains to GPT4-V/O consistently on four benchmarks that require spatial-temporal reasoning, including +20.5\% improvement on ScanQA, +9.7\% on OpenEQA's episodic memory subset, +6.0\% on the long-form video benchmark EgoSchema, and +11\% on the R2R navigation benchmark. Additionally, we show that Coarse Correspondences can also enhance open-source MLLMs' spatial reasoning (by +6.9\% on ScanQA) when applied in both training and inference and that the improvement can generalize to unseen datasets such as SQA3D (+3.1\%). Taken together, we show that Coarse Correspondences effectively and efficiently boosts models' performance on downstream tasks requiring spatial-temporal reasoning. </p> </div> </dd> <dt> <a name='item542'>[542]</a> <a href ="/abs/2408.02555" title="Abstract" id="2408.02555"> arXiv:2408.02555 </a> (replaced) [<a href="/pdf/2408.02555" title="Download PDF" id="pdf-2408.02555" aria-labelledby="pdf-2408.02555">pdf</a>, <a href="https://arxiv.org/html/2408.02555v2" title="View HTML" id="html-2408.02555" aria-labelledby="html-2408.02555" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2408.02555" title="Other formats" id="oth-2408.02555" aria-labelledby="oth-2408.02555">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> MeshAnything V2: Artist-Created Mesh Generation With Adjacent Mesh Tokenization </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+Y">Yiwen Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Y">Yikai Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Luo,+Y">Yihao Luo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Z">Zhengyi Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+Z">Zilong Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+J">Jun Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+C">Chi Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lin,+G">Guosheng Lin</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Project Page: <a href="https://buaacyw.github.io/meshanything-v2/" rel="external noopener nofollow" class="link-external link-https">this https URL</a> Github: <a href="https://github.com/buaacyw/MeshAnythingV2" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI); Graphics (cs.GR) </div> <p class='mathjax'> Meshes are the de facto 3D representation in the industry but are labor-intensive to produce. Recently, a line of research has focused on autoregressively generating meshes. This approach processes meshes into a sequence composed of vertices and then generates them vertex by vertex, similar to how a language model generates text. These methods have achieved some success but still struggle to generate complex meshes. One primary reason for this limitation is their inefficient tokenization methods. To address this issue, we introduce MeshAnything V2, an advanced mesh generation model designed to create Artist-Created Meshes that align precisely with specified shapes. A key innovation behind MeshAnything V2 is our novel Adjacent Mesh Tokenization (AMT) method. Unlike traditional approaches that represent each face using three vertices, AMT optimizes this by employing a single vertex wherever feasible, effectively reducing the token sequence length by about half on average. This not only streamlines the tokenization process but also results in more compact and well-structured sequences, enhancing the efficiency of mesh generation. With these improvements, MeshAnything V2 effectively doubles the face limit compared to previous models, delivering superior performance without increasing computational costs. We will make our code and models publicly available. Project Page: <a href="https://buaacyw.github.io/meshanything-v2/" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </p> </div> </dd> <dt> <a name='item543'>[543]</a> <a href ="/abs/2408.02697" title="Abstract" id="2408.02697"> arXiv:2408.02697 </a> (replaced) [<a href="/pdf/2408.02697" title="Download PDF" id="pdf-2408.02697" aria-labelledby="pdf-2408.02697">pdf</a>, <a href="https://arxiv.org/html/2408.02697v3" title="View HTML" id="html-2408.02697" aria-labelledby="html-2408.02697" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2408.02697" title="Other formats" id="oth-2408.02697" aria-labelledby="oth-2408.02697">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Why Rectified Power Unit Networks Fail and How to Improve It: An Effective Theory Perspective </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+T">Taeyoung Kim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kang,+M">Myungjoo Kang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 41 pages, 17 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> The Rectified Power Unit (RePU) activation functions, unlike the Rectified Linear Unit (ReLU), have the advantage of being a differentiable function when constructing neural networks. However, it can be experimentally observed when deep layers are stacked, neural networks constructed with RePU encounter critical issues. These issues include the values exploding or vanishing and failure of training. And these happen regardless of the hyperparameter initialization. From the perspective of effective theory, we aim to identify the causes of this phenomenon and propose a new activation function that retains the advantages of RePU while overcoming its drawbacks. </p> </div> </dd> <dt> <a name='item544'>[544]</a> <a href ="/abs/2408.03413" title="Abstract" id="2408.03413"> arXiv:2408.03413 </a> (replaced) [<a href="/pdf/2408.03413" title="Download PDF" id="pdf-2408.03413" aria-labelledby="pdf-2408.03413">pdf</a>, <a href="https://arxiv.org/html/2408.03413v2" title="View HTML" id="html-2408.03413" aria-labelledby="html-2408.03413" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2408.03413" title="Other formats" id="oth-2408.03413" aria-labelledby="oth-2408.03413">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A TVD neural network closure and application to turbulent combustion </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Suh,+S+W">Seung Won Suh</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=MacArt,+J+F">Jonathan F MacArt</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Olson,+L+N">Luke N Olson</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Freund,+J+B">Jonathan B Freund</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Computational Engineering, Finance, and Science (cs.CE); Fluid Dynamics (physics.flu-dyn) </div> <p class='mathjax'> Trained neural networks (NN) have attractive features for closing governing equations. There are many methods that are showing promise, but all can fail in cases when small errors consequentially violate physical reality, such as a solution boundedness condition. A NN formulation is introduced to preclude spurious oscillations that violate solution boundedness or positivity. It is embedded in the discretized equations as a machine learning closure and strictly constrained, inspired by total variation diminishing (TVD) methods for hyperbolic conservation laws. The constraint is exactly enforced during gradient-descent training by rescaling the NN parameters, which maps them onto an explicit feasible set. Demonstrations show that the constrained NN closure model usefully recovers linear and nonlinear hyperbolic phenomena and anti-diffusion while enforcing the non-oscillatory property. Finally, the model is applied to subgrid-scale (SGS) modeling of a turbulent reacting flow, for which it suppresses spurious oscillations in scalar fields that otherwise violate the solution boundedness. It outperforms a simple penalization of oscillations in the loss function. </p> </div> </dd> <dt> <a name='item545'>[545]</a> <a href ="/abs/2408.04197" title="Abstract" id="2408.04197"> arXiv:2408.04197 </a> (replaced) [<a href="/pdf/2408.04197" title="Download PDF" id="pdf-2408.04197" aria-labelledby="pdf-2408.04197">pdf</a>, <a href="https://arxiv.org/html/2408.04197v2" title="View HTML" id="html-2408.04197" aria-labelledby="html-2408.04197" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2408.04197" title="Other formats" id="oth-2408.04197" aria-labelledby="oth-2408.04197">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Pairwise Judgment Formulation for Semantic Embedding Model in Web Search </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Hong,+M">Mengze Hong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ng,+W">Wailing Ng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Guo,+Z">Zichang Guo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+C+J">Chen Jason Zhang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Information Retrieval (cs.IR)</span>; Artificial Intelligence (cs.AI); Databases (cs.DB) </div> <p class='mathjax'> Semantic Embedding Model (SEM), a neural network-based Siamese architecture, is gaining momentum in information retrieval and natural language processing. In order to train SEM in a supervised fashion for Web search, the search engine query log is typically utilized to automatically formulate pairwise judgments as training data. Despite the growing application of semantic embeddings in the search engine industry, little work has been done on formulating effective pairwise judgments for training SEM. In this paper, we make the first in-depth investigation of a wide range of strategies for generating pairwise judgments for SEM. An interesting (perhaps surprising) discovery reveals that the conventional pairwise judgment formulation strategy wildly used in the field of pairwise Learning-to-Rank (LTR) is not necessarily effective for training SEM. Through a large-scale empirical study based on query logs and click-through activities from a major commercial search engine, we demonstrate the effective strategies for SEM and highlight the advantages of a hybrid heuristic (i.e., Clicked > Non-Clicked) in comparison to the atomic heuristics (e.g., Clicked > Skipped) in LTR. We conclude with best practices for training SEM and offer promising insights for future research. </p> </div> </dd> <dt> <a name='item546'>[546]</a> <a href ="/abs/2408.04498" title="Abstract" id="2408.04498"> arXiv:2408.04498 </a> (replaced) [<a href="/pdf/2408.04498" title="Download PDF" id="pdf-2408.04498" aria-labelledby="pdf-2408.04498">pdf</a>, <a href="https://arxiv.org/html/2408.04498v2" title="View HTML" id="html-2408.04498" aria-labelledby="html-2408.04498" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2408.04498" title="Other formats" id="oth-2408.04498" aria-labelledby="oth-2408.04498">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Model-Based Transfer Learning for Contextual Reinforcement Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Cho,+J">Jung-Hoon Cho</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jayawardana,+V">Vindula Jayawardana</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+S">Sirui Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+C">Cathy Wu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> Deep reinforcement learning (RL) is a powerful approach to complex decision making. However, one issue that limits its practical application is its brittleness, sometimes failing to train in the presence of small changes in the environment. Motivated by the success of zero-shot transfer-where pre-trained models perform well on related tasks-we consider the problem of selecting a good set of training tasks to maximize generalization performance across a range of tasks. Given the high cost of training, it is critical to select training tasks strategically, but not well understood how to do so. We hence introduce Model-Based Transfer Learning (MBTL), which layers on top of existing RL methods to effectively solve contextual RL problems. MBTL models the generalization performance in two parts: 1) the performance set point, modeled using Gaussian processes, and 2) performance loss (generalization gap), modeled as a linear function of contextual similarity. MBTL combines these two pieces of information within a Bayesian optimization (BO) framework to strategically select training tasks. We show theoretically that the method exhibits sublinear regret in the number of training tasks and discuss conditions to further tighten regret bounds. We experimentally validate our methods using urban traffic and standard continuous control benchmarks. The experimental results suggest that MBTL can achieve up to 50x improved sample efficiency compared with canonical independent training and multi-task training. Further experiments demonstrate the efficacy of BO and the insensitivity to the underlying RL algorithm and hyperparameters. This work lays the foundations for investigating explicit modeling of generalization, thereby enabling principled yet effective methods for contextual RL. </p> </div> </dd> <dt> <a name='item547'>[547]</a> <a href ="/abs/2408.10072" title="Abstract" id="2408.10072"> arXiv:2408.10072 </a> (replaced) [<a href="/pdf/2408.10072" title="Download PDF" id="pdf-2408.10072" aria-labelledby="pdf-2408.10072">pdf</a>, <a href="https://arxiv.org/html/2408.10072v2" title="View HTML" id="html-2408.10072" aria-labelledby="html-2408.10072" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2408.10072" title="Other formats" id="oth-2408.10072" aria-labelledby="oth-2408.10072">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> FFAA: Multimodal Large Language Model based Explainable Open-World Face Forgery Analysis Assistant </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+Z">Zhengchao Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xia,+B">Bin Xia</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lin,+Z">Zicheng Lin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mou,+Z">Zhun Mou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+W">Wenming Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jia,+J">Jiaya Jia</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 23 pages, 21 figures; project page: <a href="https://ffaa-vl.github.io" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> The rapid advancement of deepfake technologies has sparked widespread public concern, particularly as face forgery poses a serious threat to public information security. However, the unknown and diverse forgery techniques, varied facial features and complex environmental factors pose significant challenges for face forgery analysis. Existing datasets lack descriptive annotations of these aspects, making it difficult for models to distinguish between real and forged faces using only visual information amid various confounding factors. In addition, existing methods fail to yield user-friendly and explainable results, hindering the understanding of the model's decision-making process. To address these challenges, we introduce a novel Open-World Face Forgery Analysis VQA (OW-FFA-VQA) task and its corresponding benchmark. To tackle this task, we first establish a dataset featuring a diverse collection of real and forged face images with essential descriptions and reliable forgery reasoning. Based on this dataset, we introduce FFAA: Face Forgery Analysis Assistant, consisting of a fine-tuned Multimodal Large Language Model (MLLM) and Multi-answer Intelligent Decision System (MIDS). By integrating hypothetical prompts with MIDS, the impact of fuzzy classification boundaries is effectively mitigated, enhancing model robustness. Extensive experiments demonstrate that our method not only provides user-friendly and explainable results but also significantly boosts accuracy and robustness compared to previous methods. </p> </div> </dd> <dt> <a name='item548'>[548]</a> <a href ="/abs/2408.11161" title="Abstract" id="2408.11161"> arXiv:2408.11161 </a> (replaced) [<a href="/pdf/2408.11161" title="Download PDF" id="pdf-2408.11161" aria-labelledby="pdf-2408.11161">pdf</a>, <a href="https://arxiv.org/html/2408.11161v4" title="View HTML" id="html-2408.11161" aria-labelledby="html-2408.11161" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2408.11161" title="Other formats" id="oth-2408.11161" aria-labelledby="oth-2408.11161">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> On the Advice Complexity of Online Matching on the Line </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Csaba,+B">B茅la Csaba</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nagy-Gy%C3%B6rgy,+J">Judit Nagy-Gy枚rgy</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Data Structures and Algorithms (cs.DS)</span> </div> <p class='mathjax'> We consider the matching problem on the line with advice complexity. We give a 1-competitive online algorithm with advice complexity $n-1,$ and show that there is no 1-competitive online algorithm reading less than $n-1$ bits of advice. Moreover, for each $0<k<n$ we present a $c(n/k)$-competitive online algorithm with advice complexity $O(k(\log N + \log n))$ where $n$ is the number of servers, $N$ is the distance of the minimal and maximal servers, and $c(n)$ is the complexity of the best online algorithm without advice. </p> </div> </dd> <dt> <a name='item549'>[549]</a> <a href ="/abs/2408.14199" title="Abstract" id="2408.14199"> arXiv:2408.14199 </a> (replaced) [<a href="/pdf/2408.14199" title="Download PDF" id="pdf-2408.14199" aria-labelledby="pdf-2408.14199">pdf</a>, <a href="https://arxiv.org/html/2408.14199v2" title="View HTML" id="html-2408.14199" aria-labelledby="html-2408.14199" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2408.14199" title="Other formats" id="oth-2408.14199" aria-labelledby="oth-2408.14199">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Survey on Small-Scale Testbeds for Connected and Automated Vehicles and Robot Swarms </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Mokhtarian,+A">Armin Mokhtarian</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+J">Jianye Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Scheffe,+P">Patrick Scheffe</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kloock,+M">Maximilian Kloock</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sch%C3%A4fer,+S">Simon Sch盲fer</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bang,+H">Heeseung Bang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Le,+V">Viet-Anh Le</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ulhas,+S">Sangeet Ulhas</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Betz,+J">Johannes Betz</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wilson,+S">Sean Wilson</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Berman,+S">Spring Berman</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Paull,+L">Liam Paull</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Prorok,+A">Amanda Prorok</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Alrifaee,+B">Bassam Alrifaee</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 16 pages, 11 figures, 1 table. This work was accepted by the IEEE Robotics & Automation Magazine </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span>; Multiagent Systems (cs.MA) </div> <p class='mathjax'> Connected and automated vehicles and robot swarms hold transformative potential for enhancing safety, efficiency, and sustainability in the transportation and manufacturing sectors. Extensive testing and validation of these technologies is crucial for their deployment in the real world. While simulations are essential for initial testing, they often have limitations in capturing the complex dynamics of real-world interactions. This limitation underscores the importance of small-scale testbeds. These testbeds provide a realistic, cost-effective, and controlled environment for testing and validating algorithms, acting as an essential intermediary between simulation and full-scale experiments. This work serves to facilitate researchers' efforts in identifying existing small-scale testbeds suitable for their experiments and provide insights for those who want to build their own. In addition, it delivers a comprehensive survey of the current landscape of these testbeds. We derive 62 characteristics of testbeds based on the well-known sense-plan-act paradigm and offer an online table comparing 23 small-scale testbeds based on these characteristics. The online table is hosted on our designated public webpage <a href="https://bassamlab.github.io/testbeds-survey" rel="external noopener nofollow" class="link-external link-https">this https URL</a>, and we invite testbed creators and developers to contribute to it. We closely examine nine testbeds in this paper, demonstrating how the derived characteristics can be used to present testbeds. Furthermore, we discuss three ongoing challenges concerning small-scale testbeds that we identified, i.e., small-scale to full-scale transition, sustainability, and power and resource management. </p> </div> </dd> <dt> <a name='item550'>[550]</a> <a href ="/abs/2408.14512" title="Abstract" id="2408.14512"> arXiv:2408.14512 </a> (replaced) [<a href="/pdf/2408.14512" title="Download PDF" id="pdf-2408.14512" aria-labelledby="pdf-2408.14512">pdf</a>, <a href="https://arxiv.org/html/2408.14512v2" title="View HTML" id="html-2408.14512" aria-labelledby="html-2408.14512" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2408.14512" title="Other formats" id="oth-2408.14512" aria-labelledby="oth-2408.14512">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> LLMs as Zero-shot Graph Learners: Alignment of GNN Representations with LLM Token Embeddings </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+D">Duo Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zuo,+Y">Yuan Zuo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+F">Fengzhi Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+J">Junjie Wu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Computation and Language (cs.CL) </div> <p class='mathjax'> Zero-shot graph machine learning, especially with graph neural networks (GNNs), has garnered significant interest due to the challenge of scarce labeled data. While methods like self-supervised learning and graph prompt learning have been extensively explored, they often rely on fine-tuning with task-specific labels, limiting their effectiveness in zero-shot scenarios. Inspired by the zero-shot capabilities of instruction-fine-tuned large language models (LLMs), we introduce a novel framework named Token Embedding-Aligned Graph Language Model (TEA-GLM) that leverages LLMs as cross-dataset and cross-task zero-shot learners for graph machine learning. Concretely, we pretrain a GNN, aligning its representations with token embeddings of an LLM. We then train a linear projector that transforms the GNN's representations into a fixed number of graph token embeddings without tuning the LLM. A unified instruction is designed for various graph tasks at different levels, such as node classification (node-level) and link prediction (edge-level). These design choices collectively enhance our method's effectiveness in zero-shot learning, setting it apart from existing methods. Experiments show that our graph token embeddings help the LLM predictor achieve state-of-the-art performance on unseen datasets and tasks compared to other methods using LLMs as predictors. </p> </div> </dd> <dt> <a name='item551'>[551]</a> <a href ="/abs/2408.16266" title="Abstract" id="2408.16266"> arXiv:2408.16266 </a> (replaced) [<a href="/pdf/2408.16266" title="Download PDF" id="pdf-2408.16266" aria-labelledby="pdf-2408.16266">pdf</a>, <a href="https://arxiv.org/html/2408.16266v2" title="View HTML" id="html-2408.16266" aria-labelledby="html-2408.16266" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2408.16266" title="Other formats" id="oth-2408.16266" aria-labelledby="oth-2408.16266">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Inversion Circle Interpolation: Diffusion-based Image Augmentation for Data-scarce Classification </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Y">Yanghao Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+L">Long Chen</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Data Augmentation (DA), i.e., synthesizing faithful and diverse samples to expand the original training set, is a prevalent and effective strategy to improve the performance of various data-scarce tasks. With the powerful image generation ability, diffusion-based DA has shown strong performance gains on different image classification benchmarks. In this paper, we analyze today's diffusion-based DA methods, and argue that they cannot take account of both faithfulness and diversity, which are two critical keys for generating high-quality samples and boosting classification performance. To this end, we propose a novel Diffusion-based DA method: Diff-II. Specifically, it consists of three steps: 1) Category concepts learning: Learning concept embeddings for each category. 2) Inversion interpolation: Calculating the inversion for each image, and conducting circle interpolation for two randomly sampled inversions from the same category. 3) Two-stage denoising: Using different prompts to generate synthesized images in a coarse-to-fine manner. Extensive experiments on various data-scarce image classification tasks (e.g., few-shot, long-tailed, and out-of-distribution classification) have demonstrated its effectiveness over state-of-the-art diffusion-based DA methods. </p> </div> </dd> <dt> <a name='item552'>[552]</a> <a href ="/abs/2409.04022" title="Abstract" id="2409.04022"> arXiv:2409.04022 </a> (replaced) [<a href="/pdf/2409.04022" title="Download PDF" id="pdf-2409.04022" aria-labelledby="pdf-2409.04022">pdf</a>, <a href="https://arxiv.org/html/2409.04022v4" title="View HTML" id="html-2409.04022" aria-labelledby="html-2409.04022" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2409.04022" title="Other formats" id="oth-2409.04022" aria-labelledby="oth-2409.04022">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Heterogeneity-Aware Cooperative Federated Edge Learning with Adaptive Computation and Communication Compression </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Z">Zhenxiao Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gao,+Z">Zhidong Gao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Guo,+Y">Yuanxiong Guo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gong,+Y">Yanmin Gong</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 20 pages, 8 figures, accepted by IEEE Transactions on Mobile Computing </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Distributed, Parallel, and Cluster Computing (cs.DC)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Motivated by the drawbacks of cloud-based federated learning (FL), cooperative federated edge learning (CFEL) has been proposed to improve efficiency for FL over mobile edge networks, where multiple edge servers collaboratively coordinate the distributed model training across a large number of edge devices. However, CFEL faces critical challenges arising from dynamic and heterogeneous device properties, which slow down the convergence and increase resource consumption. This paper proposes a heterogeneity-aware CFEL scheme called \textit{Heterogeneity-Aware Cooperative Edge-based Federated Averaging} (HCEF) that aims to maximize the model accuracy while minimizing the training time and energy consumption via adaptive computation and communication compression in CFEL. By theoretically analyzing how local update frequency and gradient compression affect the convergence error bound in CFEL, we develop an efficient online control algorithm for HCEF to dynamically determine local update frequencies and compression ratios for heterogeneous devices. Experimental results show that compared with prior schemes, the proposed HCEF scheme can maintain higher model accuracy while reducing training latency and improving energy efficiency simultaneously. </p> </div> </dd> <dt> <a name='item553'>[553]</a> <a href ="/abs/2409.06091" title="Abstract" id="2409.06091"> arXiv:2409.06091 </a> (replaced) [<a href="/pdf/2409.06091" title="Download PDF" id="pdf-2409.06091" aria-labelledby="pdf-2409.06091">pdf</a>, <a href="https://arxiv.org/html/2409.06091v2" title="View HTML" id="html-2409.06091" aria-labelledby="html-2409.06091" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2409.06091" title="Other formats" id="oth-2409.06091" aria-labelledby="oth-2409.06091">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Scalable Multitask Learning Using Gradient-based Estimation of Task Affinity </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+D">Dongyue Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sharma,+A">Aneesh Sharma</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+H+R">Hongyang R. Zhang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 16 pages. Appeared in KDD 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Social and Information Networks (cs.SI); Machine Learning (stat.ML) </div> <p class='mathjax'> Multitask learning is a widely used paradigm for training models on diverse tasks, with applications ranging from graph neural networks to language model fine-tuning. Since tasks may interfere with each other, a key notion for modeling their relationships is task affinity. This includes pairwise task affinity, computed among pairs of tasks, and higher-order affinity, computed among subsets of tasks. Naively computing either of them requires repeatedly training on data from various task combinations, which is computationally intensive. We present a new algorithm Grad-TAG that can estimate task affinities without this repeated training. <br>The key idea of Grad-TAG is to train a "base" model for all tasks and then use a linearization technique to estimate the loss of the model for a specific task combination. The linearization works by computing a gradient-based approximation of the loss, using low-dimensional projections of gradients as features in a logistic regression to predict labels for the task combination. We show that the linearized model can provably approximate the loss when the gradient-based approximation is accurate, and also empirically verify that on several large models. Then, given the estimated task affinity, we design a semi-definite program for clustering similar tasks by maximizing the average density of clusters. <br>We evaluate Grad-TAG's performance across seven datasets, including multi-label classification on graphs, and instruction fine-tuning of language models. Our task affinity estimates are within 2.7% distance to the true affinities while needing only 3% of FLOPs in full training. On our largest graph with 21M edges and 500 labeling tasks, our algorithm delivers estimates within 5% distance to the true affinities, using only 112 GPU hours. Our results show that Grad-TAG achieves excellent performance and runtime tradeoffs compared to existing approaches. </p> </div> </dd> <dt> <a name='item554'>[554]</a> <a href ="/abs/2409.06098" title="Abstract" id="2409.06098"> arXiv:2409.06098 </a> (replaced) [<a href="/pdf/2409.06098" title="Download PDF" id="pdf-2409.06098" aria-labelledby="pdf-2409.06098">pdf</a>, <a href="/format/2409.06098" title="Other formats" id="oth-2409.06098" aria-labelledby="oth-2409.06098">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Positioning of a Next Generation Mobile Cell to Maximise Aggregate Network Capacity </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Correia,+P+F">Paulo Furtado Correia</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Coelho,+A">Andre Coelho</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ricardo,+M">Manuel Ricardo</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 15 pages, 8 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Networking and Internet Architecture (cs.NI)</span> </div> <p class='mathjax'> In wireless communications, the need to cover operation areas, such as seaports, is at the forefront of discussion, especially regarding network capacity provisioning. Radio network planning typically involves determining the number of fixed cells, considering link budgets and deploying them geometrically centered across targeted areas. This paper proposes a solution to determine the optimal position for a mobile cell, considering 3GPP path loss models. The optimal position for the mobile cell maximises the aggregate network capacity offered to a set of User Equipments (UEs), with gains up to 187% compared to the positioning of the mobile cell at the UEs geometrical center. The proposed solution can be used by network planners and integrated into network optimisation tools. This has the potential to reduce costs associated with the Radio Access Network (RAN) planning by enhancing flexibility for on-demand deployments. </p> </div> </dd> <dt> <a name='item555'>[555]</a> <a href ="/abs/2409.07606" title="Abstract" id="2409.07606"> arXiv:2409.07606 </a> (replaced) [<a href="/pdf/2409.07606" title="Download PDF" id="pdf-2409.07606" aria-labelledby="pdf-2409.07606">pdf</a>, <a href="https://arxiv.org/html/2409.07606v3" title="View HTML" id="html-2409.07606" aria-labelledby="html-2409.07606" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2409.07606" title="Other formats" id="oth-2409.07606" aria-labelledby="oth-2409.07606">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> The Role of Deep Learning Regularizations on Actors in Offline RL </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Tarasov,+D">Denis Tarasov</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Surina,+A">Anja Surina</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gulcehre,+C">Caglar Gulcehre</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> <a href="https://github.com/DT6A/ActoReg" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Deep learning regularization techniques, such as dropout, layer normalization, or weight decay, are widely adopted in the construction of modern artificial neural networks, often resulting in more robust training processes and improved generalization capabilities. However, in the domain of Reinforcement Learning (RL), the application of these techniques has been limited, usually applied to value function estimators (Hiraoka et al., 2021; Smith et al., 2022), and may result in detrimental effects. This issue is even more pronounced in offline RL settings, which bear greater similarity to supervised learning but have received less attention. Recent work in continuous offline RL (Park et al., 2024) has demonstrated that while we can build sufficiently powerful critic networks, the generalization of actor networks remains a bottleneck. In this study, we empirically show that applying standard regularization techniques to actor networks in offline RL actor-critic algorithms yields improvements of 6% on average across two algorithms and three different continuous D4RL domains. </p> </div> </dd> <dt> <a name='item556'>[556]</a> <a href ="/abs/2409.08435" title="Abstract" id="2409.08435"> arXiv:2409.08435 </a> (replaced) [<a href="/pdf/2409.08435" title="Download PDF" id="pdf-2409.08435" aria-labelledby="pdf-2409.08435">pdf</a>, <a href="https://arxiv.org/html/2409.08435v4" title="View HTML" id="html-2409.08435" aria-labelledby="html-2409.08435" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2409.08435" title="Other formats" id="oth-2409.08435" aria-labelledby="oth-2409.08435">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> When Context Leads but Parametric Memory Follows in Large Language Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Tao,+Y">Yufei Tao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hiatt,+A">Adam Hiatt</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Haake,+E">Erik Haake</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jetter,+A+J">Antonie J. Jetter</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Agrawal,+A">Ameeta Agrawal</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted by EMNLP 2024 Main Conference </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Large language models (LLMs) have demonstrated remarkable progress in leveraging diverse knowledge sources. This study investigates how nine widely used LLMs allocate knowledge between local context and global parameters when answering open-ended questions in knowledge-consistent scenarios. We introduce a novel dataset, WikiAtomic, and systematically vary context sizes to analyze how LLMs prioritize and utilize the provided information and their parametric knowledge in knowledge-consistent scenarios. Additionally, we also study their tendency to hallucinate under varying context sizes. Our findings reveal consistent patterns across models, including a consistent reliance on both contextual (around 70%) and parametric (around 30%) knowledge, and a decrease in hallucinations with increasing context. These insights highlight the importance of more effective context organization and developing models that use input more deterministically for robust performance. </p> </div> </dd> <dt> <a name='item557'>[557]</a> <a href ="/abs/2409.10024" title="Abstract" id="2409.10024"> arXiv:2409.10024 </a> (replaced) [<a href="/pdf/2409.10024" title="Download PDF" id="pdf-2409.10024" aria-labelledby="pdf-2409.10024">pdf</a>, <a href="/format/2409.10024" title="Other formats" id="oth-2409.10024" aria-labelledby="oth-2409.10024">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Highly dynamic physical interaction for robotics: design and control of an active remote center of compliance </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Friedrich,+C">Christian Friedrich</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Frank,+P">Patrick Frank</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Santin,+M">Marco Santin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Haag,+M">Matthias Haag</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 7 pages, 7 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span> </div> <p class='mathjax'> Robot interaction control is often limited to low dynamics or low flexibility, depending on whether an active or passive approach is chosen. In this work, we introduce a hybrid control scheme that combines the advantages of active and passive interaction control. To accomplish this, we propose the design of a novel Active Remote Center of Compliance (ARCC), which is based on a passive and active element which can be used to directly control the interaction forces. We introduce surrogate models for a dynamic comparison against purely robot-based interaction schemes. In a comparative validation, ARCC drastically improves the interaction dynamics, leading to an increase in the motion bandwidth of up to 31 times. We introduce further our control approach as well as the integration in the robot controller. Finally, we analyze ARCC on different industrial benchmarks like peg-in-hole, top-hat rail assembly and contour following problems and compare it against the state of the art, to highlight the dynamic and flexibility. The proposed system is especially suited if the application requires a low cycle time combined with a sensitive manipulation. </p> </div> </dd> <dt> <a name='item558'>[558]</a> <a href ="/abs/2409.10549" title="Abstract" id="2409.10549"> arXiv:2409.10549 </a> (replaced) [<a href="/pdf/2409.10549" title="Download PDF" id="pdf-2409.10549" aria-labelledby="pdf-2409.10549">pdf</a>, <a href="https://arxiv.org/html/2409.10549v3" title="View HTML" id="html-2409.10549" aria-labelledby="html-2409.10549" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2409.10549" title="Other formats" id="oth-2409.10549" aria-labelledby="oth-2409.10549">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Confronting Conflicts to Yes: Untangling Wicked Problems with Open Design Systems </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Teuber,+L">L.G. Teuber</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wolfert,+A">A.R.M. Wolfert</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computers and Society (cs.CY)</span>; Optimization and Control (math.OC) </div> <p class='mathjax'> Current project development practices often fail to engage stakeholders early and effectively. Decision support is often non-inclusive, single-sided, and lacking in transparency, while complexity goes beyond human's comprehension. Additionally, many approaches focus primarily on technical system aspects, neglecting the integration of stakeholders' individual preferences. This often results in project impasses, leaving stakeholders unable to collaboratively achieve a "yes." There is a need for a purely associative, a-priori design approach that integrates system realities and stakeholder ideals within a joint socio-technical solution space. The state-of-the-art Preferendus, embedded in the proven Open Design Systems (Odesys) methodology, is a neutral tool for transforming complexity into success. Aiming for synthesis, Odesys' robust IMAP optimization method generates a single best-fit design solution. Here, Odesys is applied for a Dutch wind farm stalemate development, balancing multiple stakeholder preferences, wind farm performances, and project constraints. The success of this approach hinges on stakeholder trust and input. This article introduces a structured stakeholder assessment method using choice-based conjunctive analysis (CBCA), facilitating transparent determination of global and local stakeholder weights and preference functions. Modelling 'disputable' exogenous factors as endogenous design parameters, the application demonstrates how one can shift toward a collaborative "yes." For this, it is concluded that a zoomed-out solution space would enable the energy transition to be tackled with multiple options rather than a prescribed one. The Odesys approach fosters decision-making that aligns with the social threefold principles of freedom, equality, and fraternity, guiding projects toward genuine democratic outcomes rather than selecting from curated options. </p> </div> </dd> <dt> <a name='item559'>[559]</a> <a href ="/abs/2409.10929" title="Abstract" id="2409.10929"> arXiv:2409.10929 </a> (replaced) [<a href="/pdf/2409.10929" title="Download PDF" id="pdf-2409.10929" aria-labelledby="pdf-2409.10929">pdf</a>, <a href="/format/2409.10929" title="Other formats" id="oth-2409.10929" aria-labelledby="oth-2409.10929">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> An Enhanced Online Certificate Status Protocol for Public Key Infrastructure with Smart Grid and Energy Storage System </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+H">Hong-Sheng Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chuang,+C">Cheng-Che Chuang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shih,+J">Jhih-Zen Shih</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+H">Hsuan-Tung Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sun,+H">Hung-Min Sun</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 12 pages, 13 figures, Cryptology and Information Security Conference 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Cryptography and Security (cs.CR)</span> </div> <p class='mathjax'> The efficiency of checking certificate status is one of the key indicators in the public key infrastructure (PKI). This prompted researchers to design the Online Certificate Status Protocol (OCSP) standard, defined in RFC 6960, to guide developers in implementing OCSP components. However, as the environment increasingly relies on PKI for identity authentication, it is essential to protect the communication between clients and servers from rogue elements. This can be achieved by using SSL/TLS techniques to establish a secure channel, allowing Certificate Authorities (CAs) to safely transfer certificate status information. In this work, we introduce the OCSP Stapling approach to optimize OCSP query costs in our smart grid environment. This approach reduces the number of queries from the Device Language Message Specification (DLMS) server to the OCSP server. Our experimental results show that OCSP stapling increases both efficiency and security, creating a more robust architecture for the smart grid. </p> </div> </dd> <dt> <a name='item560'>[560]</a> <a href ="/abs/2409.11257" title="Abstract" id="2409.11257"> arXiv:2409.11257 </a> (replaced) [<a href="/pdf/2409.11257" title="Download PDF" id="pdf-2409.11257" aria-labelledby="pdf-2409.11257">pdf</a>, <a href="https://arxiv.org/html/2409.11257v3" title="View HTML" id="html-2409.11257" aria-labelledby="html-2409.11257" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2409.11257" title="Other formats" id="oth-2409.11257" aria-labelledby="oth-2409.11257">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> To What Extent do Open-loop and Feedback Nash Equilibria Diverge in General-Sum Linear Quadratic Dynamic Games? </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Chiu,+C">Chih-Yuan Chiu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Li,+J">Jingqi Li</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Bhatt,+M">Maulik Bhatt</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Mehr,+N">Negar Mehr</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Systems and Control (eess.SY)</span> </div> <p class='mathjax'> Dynamic games offer a versatile framework for modeling the evolving interactions of strategic agents, whose steady-state behavior can be captured by the Nash equilibria of the games. Nash equilibria are often computed in feedback, with policies depending on the state at each time, or in open-loop, with policies depending only on the initial state. Empirically, open-loop Nash equilibria (OLNE) could be more efficient to compute, while feedback Nash equilibria (FBNE) often encode more complex interactions. However, it remains unclear exactly which dynamic games yield FBNE and OLNE that differ significantly and which do not. To address this problem, we present a principled comparison study of OLNE and FBNE in linear quadratic (LQ) dynamic games. Specifically, we prove that the OLNE strategies of an LQ dynamic game can be synthesized by solving the coupled Riccati equations of an auxiliary LQ game with perturbed costs. The construction of the auxiliary game allows us to establish conditions under which OLNE and FBNE coincide and derive an upper bound on the deviation between FBNE and OLNE of an LQ game. </p> </div> </dd> <dt> <a name='item561'>[561]</a> <a href ="/abs/2409.11340" title="Abstract" id="2409.11340"> arXiv:2409.11340 </a> (replaced) [<a href="/pdf/2409.11340" title="Download PDF" id="pdf-2409.11340" aria-labelledby="pdf-2409.11340">pdf</a>, <a href="https://arxiv.org/html/2409.11340v2" title="View HTML" id="html-2409.11340" aria-labelledby="html-2409.11340" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2409.11340" title="Other formats" id="oth-2409.11340" aria-labelledby="oth-2409.11340">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> OmniGen: Unified Image Generation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Xiao,+S">Shitao Xiao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Y">Yueze Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+J">Junjie Zhou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yuan,+H">Huaying Yuan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xing,+X">Xingrun Xing</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yan,+R">Ruiran Yan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+C">Chaofan Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+S">Shuting Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+T">Tiejun Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Z">Zheng Liu</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Update the paper for OmniGen-v1 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> The emergence of Large Language Models (LLMs) has unified language generation tasks and revolutionized human-machine interaction. However, in the realm of image generation, a unified model capable of handling various tasks within a single framework remains largely unexplored. In this work, we introduce OmniGen, a new diffusion model for unified image generation. OmniGen is characterized by the following features: 1) Unification: OmniGen not only demonstrates text-to-image generation capabilities but also inherently supports various downstream tasks, such as image editing, subject-driven generation, and visual-conditional generation. 2) Simplicity: The architecture of OmniGen is highly simplified, eliminating the need for additional plugins. Moreover, compared to existing diffusion models, it is more user-friendly and can complete complex tasks end-to-end through instructions without the need for extra intermediate steps, greatly simplifying the image generation workflow. 3) Knowledge Transfer: Benefit from learning in a unified format, OmniGen effectively transfers knowledge across different tasks, manages unseen tasks and domains, and exhibits novel capabilities. We also explore the model's reasoning capabilities and potential applications of the chain-of-thought mechanism. This work represents the first attempt at a general-purpose image generation model, and we will release our resources at <a href="https://github.com/VectorSpaceLab/OmniGen" rel="external noopener nofollow" class="link-external link-https">this https URL</a> to foster future advancements. </p> </div> </dd> <dt> <a name='item562'>[562]</a> <a href ="/abs/2409.13343" title="Abstract" id="2409.13343"> arXiv:2409.13343 </a> (replaced) [<a href="/pdf/2409.13343" title="Download PDF" id="pdf-2409.13343" aria-labelledby="pdf-2409.13343">pdf</a>, <a href="https://arxiv.org/html/2409.13343v2" title="View HTML" id="html-2409.13343" aria-labelledby="html-2409.13343" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2409.13343" title="Other formats" id="oth-2409.13343" aria-labelledby="oth-2409.13343">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> "I Don't Use AI for Everything": Exploring Utility, Attitude, and Responsibility of AI-empowered Tools in Software Development </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Pan,+S">Shidong Pan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+L">Litian Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+T">Tianyi Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xing,+Z">Zhenchang Xing</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+Y">Yanjie Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lu,+Q">Qinghua Lu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sun,+X">Xiaoyu Sun</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Compared to the previous version, we remove the MathJax format in the title, as the Google Scholar cannot correctly recognise it </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Software Engineering (cs.SE)</span>; Cryptography and Security (cs.CR) </div> <p class='mathjax'> AI-empowered tools have emerged as a transformative force, fundamentally reshaping the software development industry and promising far-reaching impacts across diverse sectors. This study investigates the adoption, impact, and security considerations of AI-empowered tools in the software development process. Through semi-structured interviews with 19 software practitioners from diverse backgrounds, we explore three key aspects: the utility of AI tools, developers' attitudes towards them, and security and privacy responsibilities. Our findings reveal widespread adoption of AI tools across various stages of software development. Developers generally express positive attitudes towards AI, viewing it as an efficiency-enhancing assistant rather than a job replacement threat. However, they also recognized limitations in AI's ability to handle complex, unfamiliar, or highly specialized tasks in software development. Regarding security and privacy, we found varying levels of risk awareness among developers, with larger companies implementing more comprehensive risk management strategies. Our study provides insights into the current state of AI adoption in software development and offers recommendations for practitioners, organizations, AI providers, and regulatory bodies to effectively navigate the integration of AI in the software industry. </p> </div> </dd> <dt> <a name='item563'>[563]</a> <a href ="/abs/2409.13503" title="Abstract" id="2409.13503"> arXiv:2409.13503 </a> (replaced) [<a href="/pdf/2409.13503" title="Download PDF" id="pdf-2409.13503" aria-labelledby="pdf-2409.13503">pdf</a>, <a href="https://arxiv.org/html/2409.13503v3" title="View HTML" id="html-2409.13503" aria-labelledby="html-2409.13503" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2409.13503" title="Other formats" id="oth-2409.13503" aria-labelledby="oth-2409.13503">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> SatFed: A Resource-Efficient LEO Satellite-Assisted Heterogeneous Federated Learning Framework </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Y">Yuxin Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lin,+Z">Zheng Lin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+Z">Zhe Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fang,+Z">Zihan Fang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+W">Wenjun Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+X">Xianhao Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+J">Jin Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gao,+Y">Yue Gao</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 10 pages, 12 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Distributed, Parallel, and Cluster Computing (cs.DC)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> Traditional federated learning (FL) frameworks rely heavily on terrestrial networks, where coverage limitations and increasing bandwidth congestion significantly hinder model convergence. Fortunately, the advancement of low-Earth orbit (LEO) satellite networks offers promising new communication avenues to augment traditional terrestrial FL. Despite this potential, the limited satellite-ground communication bandwidth and the heterogeneous operating environments of ground devices-including variations in data, bandwidth, and computing power-pose substantial challenges for effective and robust satellite-assisted FL. To address these challenges, we propose SatFed, a resource-efficient satellite-assisted heterogeneous FL framework. SatFed implements freshness-based model prioritization queues to optimize the use of highly constrained satellite-ground bandwidth, ensuring the transmission of the most critical models. Additionally, a multigraph is constructed to capture real-time heterogeneous relationships between devices, including data distribution, terrestrial bandwidth, and computing capability. This multigraph enables SatFed to aggregate satellite-transmitted models into peer guidance, enhancing local training in heterogeneous environments. Extensive experiments with real-world LEO satellite networks demonstrate that SatFed achieves superior performance and robustness compared to state-of-the-art benchmarks. </p> </div> </dd> <dt> <a name='item564'>[564]</a> <a href ="/abs/2409.13978" title="Abstract" id="2409.13978"> arXiv:2409.13978 </a> (replaced) [<a href="/pdf/2409.13978" title="Download PDF" id="pdf-2409.13978" aria-labelledby="pdf-2409.13978">pdf</a>, <a href="https://arxiv.org/html/2409.13978v3" title="View HTML" id="html-2409.13978" aria-labelledby="html-2409.13978" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2409.13978" title="Other formats" id="oth-2409.13978" aria-labelledby="oth-2409.13978">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> FracGM: A Fast Fractional Programming Technique for Geman-McClure Robust Estimator </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+B">Bang-Shien Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lin,+Y">Yu-Kai Lin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+J">Jian-Yu Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+C">Chih-Wei Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chern,+J">Jann-Long Chern</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sun,+C">Ching-Cherng Sun</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 8 pages, 6 figures </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> IEEE Robotics and Automation Letters, 9(12), 11666-11673, 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Robotics (cs.RO); Optimization and Control (math.OC) </div> <p class='mathjax'> Robust estimation is essential in computer vision, robotics, and navigation, aiming to minimize the impact of outlier measurements for improved accuracy. We present a fast algorithm for Geman-McClure robust estimation, FracGM, leveraging fractional programming techniques. This solver reformulates the original non-convex fractional problem to a convex dual problem and a linear equation system, iteratively solving them in an alternating optimization pattern. Compared to graduated non-convexity approaches, this strategy exhibits a faster convergence rate and better outlier rejection capability. In addition, the global optimality of the proposed solver can be guaranteed under given conditions. We demonstrate the proposed FracGM solver with Wahba's rotation problem and 3-D point-cloud registration along with relaxation pre-processing and projection post-processing. Compared to state-of-the-art algorithms, when the outlier rates increase from 20% to 80%, FracGM shows 53% and 88% lower rotation and translation increases. In real-world scenarios, FracGM achieves better results in 13 out of 18 outcomes, while having a 19.43% improvement in the computation time. </p> </div> </dd> <dt> <a name='item565'>[565]</a> <a href ="/abs/2409.16098" title="Abstract" id="2409.16098"> arXiv:2409.16098 </a> (replaced) [<a href="/pdf/2409.16098" title="Download PDF" id="pdf-2409.16098" aria-labelledby="pdf-2409.16098">pdf</a>, <a href="/format/2409.16098" title="Other formats" id="oth-2409.16098" aria-labelledby="oth-2409.16098">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> The Digital Transformation in Health: How AI Can Improve the Performance of Health Systems </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Peri%C3%A1%C3%B1ez,+%C3%81">脕frica Peri谩帽ez</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=del+R%C3%ADo,+A+F">Ana Fern谩ndez del R铆o</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nazarov,+I">Ivan Nazarov</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jan%C3%A9,+E">Enric Jan茅</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hassan,+M">Moiz Hassan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rastogi,+A">Aditya Rastogi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tang,+D">Dexian Tang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> This is an original manuscript of an article published by Taylor & Francis in Health Systems & Reform on 22 Oct 2024, available online: <a href="https://www.tandfonline.com/doi/10.1080/23288604.2024.2387138" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> Health Systems & Reform, 10(2), 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Computers and Society (cs.CY); Human-Computer Interaction (cs.HC) </div> <p class='mathjax'> Mobile health has the potential to revolutionize health care delivery and patient engagement. In this work, we discuss how integrating Artificial Intelligence into digital health applications-focused on supply chain, patient management, and capacity building, among other use cases-can improve the health system and public health performance. We present an Artificial Intelligence and Reinforcement Learning platform that allows the delivery of adaptive interventions whose impact can be optimized through experimentation and real-time monitoring. The system can integrate multiple data sources and digital health applications. The flexibility of this platform to connect to various mobile health applications and digital devices and send personalized recommendations based on past data and predictions can significantly improve the impact of digital tools on health system outcomes. The potential for resource-poor settings, where the impact of this approach on health outcomes could be more decisive, is discussed specifically. This framework is, however, similarly applicable to improving efficiency in health systems where scarcity is not an issue. </p> </div> </dd> <dt> <a name='item566'>[566]</a> <a href ="/abs/2409.18862" title="Abstract" id="2409.18862"> arXiv:2409.18862 </a> (replaced) [<a href="/pdf/2409.18862" title="Download PDF" id="pdf-2409.18862" aria-labelledby="pdf-2409.18862">pdf</a>, <a href="https://arxiv.org/html/2409.18862v3" title="View HTML" id="html-2409.18862" aria-labelledby="html-2409.18862" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2409.18862" title="Other formats" id="oth-2409.18862" aria-labelledby="oth-2409.18862">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Safe Decentralized Multi-Agent Control using Black-Box Predictors, Conformal Decision Policies, and Control Barrier Functions </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Huriot,+S">Sacha Huriot</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Sibai,+H">Hussein Sibai</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 6 pages, 1 figure, submitted for ICRA 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Systems and Control (eess.SY)</span>; Multiagent Systems (cs.MA); Robotics (cs.RO) </div> <p class='mathjax'> We address the challenge of safe control in decentralized multi-agent robotic settings, where agents use uncertain black-box models to predict other agents' trajectories. We use the recently proposed conformal decision theory to adapt the restrictiveness of control barrier functions-based safety constraints based on observed prediction errors. We use these constraints to synthesize controllers that balance between the objectives of safety and task accomplishment, despite the prediction errors. We provide an upper bound on the average over time of the value of a monotonic function of the difference between the safety constraint based on the predicted trajectories and the constraint based on the ground truth ones. We validate our theory through experimental results showing the performance of our controllers when navigating a robot in the multi-agent scenes in the Stanford Drone Dataset. </p> </div> </dd> <dt> <a name='item567'>[567]</a> <a href ="/abs/2410.00392" title="Abstract" id="2410.00392"> arXiv:2410.00392 </a> (replaced) [<a href="/pdf/2410.00392" title="Download PDF" id="pdf-2410.00392" aria-labelledby="pdf-2410.00392">pdf</a>, <a href="https://arxiv.org/html/2410.00392v3" title="View HTML" id="html-2410.00392" aria-labelledby="html-2410.00392" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.00392" title="Other formats" id="oth-2410.00392" aria-labelledby="oth-2410.00392">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> MERIT: Multimodal Wearable Vital Sign Waveform Monitoring </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Tang,+Y">Yongyang Tang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Chen,+Z">Zhe Chen</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Li,+A">Ang Li</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Zheng,+T">Tianyue Zheng</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Lin,+Z">Zheng Lin</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Xu,+J">Jia Xu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Lv,+P">Pin Lv</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Sun,+Z">Zhe Sun</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Gao,+Y">Yue Gao</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 8 pages, 10 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Systems and Control (eess.SY)</span>; Hardware Architecture (cs.AR) </div> <p class='mathjax'> Cardiovascular disease (CVD) is the leading cause of death and premature mortality worldwide, with occupational environments significantly influencing CVD risk, underscoring the need for effective cardiac monitoring and early warning systems. Existing methods of monitoring vital signs require subjects to remain stationary, which is impractical for daily monitoring as individuals are often in motion. To address this limitation, we propose MERIT, a multimodality-based wearable system designed for precise ECG waveform monitoring without movement restrictions. Daily activities, involving frequent arm movements, can significantly affect sensor data and complicate the reconstruction of accurate ECG signals. To mitigate motion impact and enhance ECG signal reconstruction, we introduce a deep independent component analysis (Deep-ICA) module and a multimodal fusion module. We conducted experiments with 15 subjects. Our results, compared with commercial wearable devices and existing methods, demonstrate that MERIT accurately reconstructs ECG waveforms during various office activities, offering a reliable solution for fine-grained cardiac monitoring in dynamic environments. </p> </div> </dd> <dt> <a name='item568'>[568]</a> <a href ="/abs/2410.02068" title="Abstract" id="2410.02068"> arXiv:2410.02068 </a> (replaced) [<a href="/pdf/2410.02068" title="Download PDF" id="pdf-2410.02068" aria-labelledby="pdf-2410.02068">pdf</a>, <a href="https://arxiv.org/html/2410.02068v2" title="View HTML" id="html-2410.02068" aria-labelledby="html-2410.02068" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.02068" title="Other formats" id="oth-2410.02068" aria-labelledby="oth-2410.02068">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Fast and Sample Efficient Multi-Task Representation Learning in Stochastic Contextual Bandits </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Lin,+J">Jiabin Lin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Moothedath,+S">Shana Moothedath</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Vaswani,+N">Namrata Vaswani</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Machine Learning (stat.ML) </div> <p class='mathjax'> We study how representation learning can improve the learning efficiency of contextual bandit problems. We study the setting where we play T contextual linear bandits with dimension d simultaneously, and these T bandit tasks collectively share a common linear representation with a dimensionality of r much smaller than d. We present a new algorithm based on alternating projected gradient descent (GD) and minimization estimator to recover a low-rank feature matrix. Using the proposed estimator, we present a multi-task learning algorithm for linear contextual bandits and prove the regret bound of our algorithm. We presented experiments and compared the performance of our algorithm against benchmark algorithms. </p> </div> </dd> <dt> <a name='item569'>[569]</a> <a href ="/abs/2410.02592" title="Abstract" id="2410.02592"> arXiv:2410.02592 </a> (replaced) [<a href="/pdf/2410.02592" title="Download PDF" id="pdf-2410.02592" aria-labelledby="pdf-2410.02592">pdf</a>, <a href="https://arxiv.org/html/2410.02592v4" title="View HTML" id="html-2410.02592" aria-labelledby="html-2410.02592" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.02592" title="Other formats" id="oth-2410.02592" aria-labelledby="oth-2410.02592">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> IC3M: In-Car Multimodal Multi-object Monitoring for Abnormal Status of Both Driver and Passengers </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Fang,+Z">Zihan Fang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lin,+Z">Zheng Lin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hu,+S">Senkang Hu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cao,+H">Hangcheng Cao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Deng,+Y">Yiqin Deng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+X">Xianhao Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fang,+Y">Yuguang Fang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 16 pages, 17 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG); Systems and Control (eess.SY) </div> <p class='mathjax'> Recently, in-car monitoring has emerged as a promising technology for detecting early-stage abnormal status of the driver and providing timely alerts to prevent traffic accidents. Although training models with multimodal data enhances the reliability of abnormal status detection, the scarcity of labeled data and the imbalance of class distribution impede the extraction of critical abnormal state features, significantly deteriorating training performance. Furthermore, missing modalities due to environment and hardware limitations further exacerbate the challenge of abnormal status identification. More importantly, monitoring abnormal health conditions of passengers, particularly in elderly care, is of paramount importance but remains underexplored. To address these challenges, we introduce our IC3M, an efficient camera-rotation-based multimodal framework for monitoring both driver and passengers in a car. Our IC3M comprises two key modules: an adaptive threshold pseudo-labeling strategy and a missing modality reconstruction. The former customizes pseudo-labeling thresholds for different classes based on the class distribution, generating class-balanced pseudo labels to guide model training effectively, while the latter leverages crossmodality relationships learned from limited labels to accurately recover missing modalities by distribution transferring from available modalities. Extensive experimental results demonstrate that IC3M outperforms state-of-the-art benchmarks in accuracy, precision, and recall while exhibiting superior robustness under limited labeled data and severe missing modality. </p> </div> </dd> <dt> <a name='item570'>[570]</a> <a href ="/abs/2410.02884" title="Abstract" id="2410.02884"> arXiv:2410.02884 </a> (replaced) [<a href="/pdf/2410.02884" title="Download PDF" id="pdf-2410.02884" aria-labelledby="pdf-2410.02884">pdf</a>, <a href="https://arxiv.org/html/2410.02884v2" title="View HTML" id="html-2410.02884" aria-labelledby="html-2410.02884" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.02884" title="Other formats" id="oth-2410.02884" aria-labelledby="oth-2410.02884">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> LLaMA-Berry: Pairwise Optimization for O1-like Olympiad-Level Mathematical Reasoning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+D">Di Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+J">Jianbo Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lei,+J">Jingdi Lei</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Che,+T">Tong Che</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+J">Jiatong Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xie,+T">Tong Xie</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+X">Xiaoshui Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+S">Shufei Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pavone,+M">Marco Pavone</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+Y">Yuqiang Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ouyang,+W">Wanli Ouyang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+D">Dongzhan Zhou</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Artificial Intelligence (cs.AI)</span>; Computation and Language (cs.CL) </div> <p class='mathjax'> This paper presents an advanced mathematical problem-solving framework, LLaMA-Berry, for enhancing the mathematical reasoning ability of Large Language Models (LLMs). The framework combines Monte Carlo Tree Search (MCTS) with iterative Self-Refine to optimize the reasoning path and utilizes a pairwise reward model to evaluate different paths globally. By leveraging the self-critic and rewriting capabilities of LLMs, Self-Refine applied to MCTS (SR-MCTS) overcomes the inefficiencies and limitations of conventional step-wise and greedy search algorithms by fostering a more efficient exploration of solution spaces. Pairwise Preference Reward Model~(PPRM), inspired by Reinforcement Learning from Human Feedback (RLHF), is then used to model pairwise preferences between solutions, utilizing an Enhanced Borda Count (EBC) method to synthesize these preferences into a global ranking score to find better answers. This approach addresses the challenges of scoring variability and non-independent distributions in mathematical reasoning tasks. The framework has been tested on general and advanced benchmarks, showing superior performance in terms of search efficiency and problem-solving capability compared to existing methods like ToT and rStar, particularly in complex Olympiad-level benchmarks, including GPQA, AIME24 and AMC23. </p> </div> </dd> <dt> <a name='item571'>[571]</a> <a href ="/abs/2410.04309" title="Abstract" id="2410.04309"> arXiv:2410.04309 </a> (replaced) [<a href="/pdf/2410.04309" title="Download PDF" id="pdf-2410.04309" aria-labelledby="pdf-2410.04309">pdf</a>, <a href="https://arxiv.org/html/2410.04309v2" title="View HTML" id="html-2410.04309" aria-labelledby="html-2410.04309" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.04309" title="Other formats" id="oth-2410.04309" aria-labelledby="oth-2410.04309">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Comprehensive Monitoring of Air Pollution Hotspots Using Sparse Sensor Networks </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Bhardwaj,+A">Ankit Bhardwaj</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Balashankar,+A">Ananth Balashankar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Iyer,+S">Shiva Iyer</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Soans,+N">Nita Soans</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sudarshan,+A">Anant Sudarshan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pande,+R">Rohini Pande</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Subramanian,+L">Lakshminarayanan Subramanian</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computers and Society (cs.CY)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Urban air pollution hotspots pose significant health risks, yet their detection and analysis remain limited by the sparsity of public sensor networks. This paper addresses this challenge by combining predictive modeling and mechanistic approaches to comprehensively monitor pollution hotspots. We enhanced New Delhi's existing sensor network with 28 low-cost sensors, collecting PM2.5 data over 30 months from May 1, 2018, to Nov 1, 2020. Applying established definitions of hotspots to this data, we found the existence of additional 189 hidden hotspots apart from confirming 660 hotspots detected by the public network. Using predictive techniques like Space-Time Kriging, we identified hidden hotspots with 95% precision and 88% recall with 50% sensor failure rate, and with 98% precision and 95% recall with 50% missing sensors. The projected results of our predictive models were further compiled into policy recommendations for public authorities. Additionally, we developed a Gaussian Plume Dispersion Model to understand the mechanistic underpinnings of hotspot formation, incorporating an emissions inventory derived from local sources. Our mechanistic model is able to explain 65% of observed transient hotspots. Our findings underscore the importance of integrating data-driven predictive models with physics-based mechanistic models for scalable and robust air pollution management in resource-constrained settings. </p> </div> </dd> <dt> <a name='item572'>[572]</a> <a href ="/abs/2410.07753" title="Abstract" id="2410.07753"> arXiv:2410.07753 </a> (replaced) [<a href="/pdf/2410.07753" title="Download PDF" id="pdf-2410.07753" aria-labelledby="pdf-2410.07753">pdf</a>, <a href="https://arxiv.org/html/2410.07753v2" title="View HTML" id="html-2410.07753" aria-labelledby="html-2410.07753" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.07753" title="Other formats" id="oth-2410.07753" aria-labelledby="oth-2410.07753">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Data Augmentation for Surgical Scene Segmentation with Anatomy-Aware Diffusion Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Venkatesh,+D+K">Danush Kumar Venkatesh</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rivoir,+D">Dominik Rivoir</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pfeiffer,+M">Micha Pfeiffer</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kolbinger,+F">Fiona Kolbinger</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Speidel,+S">Stefanie Speidel</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted at WACV 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> In computer-assisted surgery, automatically recognizing anatomical organs is crucial for understanding the surgical scene and providing intraoperative assistance. While machine learning models can identify such structures, their deployment is hindered by the need for labeled, diverse surgical datasets with anatomical annotations. Labeling multiple classes (i.e., organs) in a surgical scene is time-intensive, requiring medical experts. Although synthetically generated images can enhance segmentation performance, maintaining both organ structure and texture during generation is challenging. We introduce a multi-stage approach using diffusion models to generate multi-class surgical datasets with annotations. Our framework improves anatomy awareness by training organ specific models with an inpainting objective guided by binary segmentation masks. The organs are generated with an inference pipeline using pre-trained ControlNet to maintain the organ structure. The synthetic multi-class datasets are constructed through an image composition step, ensuring structural and textural consistency. This versatile approach allows the generation of multi-class datasets from real binary datasets and simulated surgical masks. We thoroughly evaluate the generated datasets on image quality and downstream segmentation, achieving a $15\%$ improvement in segmentation scores when combined with real images. The code is available at <a href="https://gitlab.com/nct_tso_public/muli-class-image-synthesis" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </p> </div> </dd> <dt> <a name='item573'>[573]</a> <a href ="/abs/2410.08109" title="Abstract" id="2410.08109"> arXiv:2410.08109 </a> (replaced) [<a href="/pdf/2410.08109" title="Download PDF" id="pdf-2410.08109" aria-labelledby="pdf-2410.08109">pdf</a>, <a href="https://arxiv.org/html/2410.08109v2" title="View HTML" id="html-2410.08109" aria-labelledby="html-2410.08109" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.08109" title="Other formats" id="oth-2410.08109" aria-labelledby="oth-2410.08109">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Closer Look at Machine Unlearning for Large Language Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Yuan,+X">Xiaojian Yuan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pang,+T">Tianyu Pang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Du,+C">Chao Du</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+K">Kejiang Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+W">Weiming Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lin,+M">Min Lin</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> Large language models (LLMs) may memorize sensitive or copyrighted content, raising privacy and legal concerns. Due to the high cost of retraining from scratch, researchers attempt to employ machine unlearning to remove specific content from LLMs while preserving the overall performance. In this paper, we discuss several issues in machine unlearning for LLMs and provide our insights on possible approaches. To address the issue of inadequate evaluation of model outputs after unlearning, we introduce three additional metrics to evaluate token diversity, sentence semantics, and factual correctness. We then categorize unlearning methods into untargeted and targeted, and discuss their issues respectively. Specifically, the behavior that untargeted unlearning attempts to approximate is unpredictable and may involve hallucinations, and existing regularization is insufficient for targeted unlearning. To alleviate these issues, we propose using the objective of maximizing entropy (ME) for untargeted unlearning and incorporate answer preservation (AP) loss as regularization for targeted unlearning. Experimental results across three scenarios, i.e., fictitious unlearning, continual unlearning, and real-world unlearning, demonstrate the effectiveness of our approaches. The code is available at <a href="https://github.com/sail-sg/closer-look-LLM-unlearning" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item574'>[574]</a> <a href ="/abs/2410.09747" title="Abstract" id="2410.09747"> arXiv:2410.09747 </a> (replaced) [<a href="/pdf/2410.09747" title="Download PDF" id="pdf-2410.09747" aria-labelledby="pdf-2410.09747">pdf</a>, <a href="https://arxiv.org/html/2410.09747v3" title="View HTML" id="html-2410.09747" aria-labelledby="html-2410.09747" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.09747" title="Other formats" id="oth-2410.09747" aria-labelledby="oth-2410.09747">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> t-READi: Transformer-Powered Robust and Efficient Multimodal Inference for Autonomous Driving </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Hu,+P">Pengfei Hu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Qian,+Y">Yuhang Qian</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zheng,+T">Tianyue Zheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+A">Ang Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+Z">Zhe Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gao,+Y">Yue Gao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cheng,+X">Xiuzhen Cheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Luo,+J">Jun Luo</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 14 pages, 16 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI); Distributed, Parallel, and Cluster Computing (cs.DC); Machine Learning (cs.LG); Robotics (cs.RO) </div> <p class='mathjax'> Given the wide adoption of multimodal sensors (e.g., camera, lidar, radar) by autonomous vehicles (AVs), deep analytics to fuse their outputs for a robust perception become imperative. However, existing fusion methods often make two assumptions rarely holding in practice: i) similar data distributions for all inputs and ii) constant availability for all sensors. Because, for example, lidars have various resolutions and failures of radars may occur, such variability often results in significant performance degradation in fusion. To this end, we present tREADi, an adaptive inference system that accommodates the variability of multimodal sensory data and thus enables robust and efficient perception. t-READi identifies variation-sensitive yet structure-specific model parameters; it then adapts only these parameters while keeping the rest intact. t-READi also leverages a cross-modality contrastive learning method to compensate for the loss from missing modalities. Both functions are implemented to maintain compatibility with existing multimodal deep fusion methods. The extensive experiments evidently demonstrate that compared with the status quo approaches, t-READi not only improves the average inference accuracy by more than 6% but also reduces the inference latency by almost 15x with the cost of only 5% extra memory overhead in the worst case under realistic data and modal variations. </p> </div> </dd> <dt> <a name='item575'>[575]</a> <a href ="/abs/2410.11112" title="Abstract" id="2410.11112"> arXiv:2410.11112 </a> (replaced) [<a href="/pdf/2410.11112" title="Download PDF" id="pdf-2410.11112" aria-labelledby="pdf-2410.11112">pdf</a>, <a href="https://arxiv.org/html/2410.11112v2" title="View HTML" id="html-2410.11112" aria-labelledby="html-2410.11112" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.11112" title="Other formats" id="oth-2410.11112" aria-labelledby="oth-2410.11112">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Differentiable Weightless Neural Networks </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Bacellar,+A+T+L">Alan T. L. Bacellar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Susskind,+Z">Zachary Susskind</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Breternitz,+M">Mauricio Breternitz Jr.</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=John,+E">Eugene John</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=John,+L+K">Lizy K. John</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lima,+P+M+V">Priscila M. V. Lima</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fran%C3%A7a,+F+M+G">Felipe M. G. Fran莽a</a></div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> International Conference on Machine Learning (ICML) 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> We introduce the Differentiable Weightless Neural Network (DWN), a model based on interconnected lookup tables. Training of DWNs is enabled by a novel Extended Finite Difference technique for approximate differentiation of binary values. We propose Learnable Mapping, Learnable Reduction, and Spectral Regularization to further improve the accuracy and efficiency of these models. We evaluate DWNs in three edge computing contexts: (1) an FPGA-based hardware accelerator, where they demonstrate superior latency, throughput, energy efficiency, and model area compared to state-of-the-art solutions, (2) a low-power microcontroller, where they achieve preferable accuracy to XGBoost while subject to stringent memory constraints, and (3) ultra-low-cost chips, where they consistently outperform small models in both accuracy and projected hardware area. DWNs also compare favorably against leading approaches for tabular datasets, with higher average rank. Overall, our work positions DWNs as a pioneering solution for edge-compatible high-throughput neural networks. </p> </div> </dd> <dt> <a name='item576'>[576]</a> <a href ="/abs/2410.11340" title="Abstract" id="2410.11340"> arXiv:2410.11340 </a> (replaced) [<a href="/pdf/2410.11340" title="Download PDF" id="pdf-2410.11340" aria-labelledby="pdf-2410.11340">pdf</a>, <a href="https://arxiv.org/html/2410.11340v2" title="View HTML" id="html-2410.11340" aria-labelledby="html-2410.11340" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.11340" title="Other formats" id="oth-2410.11340" aria-labelledby="oth-2410.11340">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Toward a Well-Calibrated Discrimination via Survival Outcome-Aware Contrastive Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Lee,+D">Dongjoon Lee</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Park,+H">Hyeryn Park</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lee,+C">Changhee Lee</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted at NeurIPS 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> Previous deep learning approaches for survival analysis have primarily relied on ranking losses to improve discrimination performance, which often comes at the expense of calibration performance. To address such an issue, we propose a novel contrastive learning approach specifically designed to enhance discrimination \textit{without} sacrificing calibration. Our method employs weighted sampling within a contrastive learning framework, assigning lower penalties to samples with similar survival outcomes. This aligns well with the assumption that patients with similar event times share similar clinical statuses. Consequently, when augmented with the commonly used negative log-likelihood loss, our approach significantly improves discrimination performance without directly manipulating the model outputs, thereby achieving better calibration. Experiments on multiple real-world clinical datasets demonstrate that our method outperforms state-of-the-art deep survival models in both discrimination and calibration. Through comprehensive ablation studies, we further validate the effectiveness of our approach through quantitative and qualitative analyses. </p> </div> </dd> <dt> <a name='item577'>[577]</a> <a href ="/abs/2410.13714" title="Abstract" id="2410.13714"> arXiv:2410.13714 </a> (replaced) [<a href="/pdf/2410.13714" title="Download PDF" id="pdf-2410.13714" aria-labelledby="pdf-2410.13714">pdf</a>, <a href="https://arxiv.org/html/2410.13714v4" title="View HTML" id="html-2410.13714" aria-labelledby="html-2410.13714" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.13714" title="Other formats" id="oth-2410.13714" aria-labelledby="oth-2410.13714">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Generation through the lens of learning theory </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+J">Jiaxun Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Raman,+V">Vinod Raman</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tewari,+A">Ambuj Tewari</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 28 pages, 2 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Machine Learning (stat.ML) </div> <p class='mathjax'> We study generation through the lens of statistical learning theory. First, we abstract and formalize the results of Gold [1967], Angluin [1979], Angluin [1980] and Kleinberg and Mullainathan [2024] in terms of a binary hypothesis class defined over an abstract example space. Then, we extend the notion of "generation" from Kleinberg and Mullainathan [2024] to two new settings, we call "uniform" and "non-uniform" generation, and provide a characterization of which hypothesis classes are uniformly and non-uniformly generatable. As is standard in learning theory, our characterizations are in terms of the finiteness of a new combinatorial dimension termed the Closure dimension. By doing so, we are able to compare generatability with predictability (captured via PAC and online learnability) and show that these two properties of hypothesis classes are incompatible -- there are classes that are generatable but not predictable and vice versa. Finally, we extend our results to capture prompted generation and give a complete characterization of which classes are prompt generatable, generalizing some of the work by Kleinberg and Mullainathan [2024]. </p> </div> </dd> <dt> <a name='item578'>[578]</a> <a href ="/abs/2410.14107" title="Abstract" id="2410.14107"> arXiv:2410.14107 </a> (replaced) [<a href="/pdf/2410.14107" title="Download PDF" id="pdf-2410.14107" aria-labelledby="pdf-2410.14107">pdf</a>, <a href="/format/2410.14107" title="Other formats" id="oth-2410.14107" aria-labelledby="oth-2410.14107">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Transfer Learning on Transformers for Building Energy Consumption Forecasting -- A Comparative Study </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Spencer,+R">Robert Spencer</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ranathunga,+S">Surangika Ranathunga</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Boulic,+M">Mikael Boulic</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=van+Heerden,+A">Andries van Heerden</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Susnjak,+T">Teo Susnjak</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> This study investigates the application of Transfer Learning (TL) on Transformer architectures to enhance building energy consumption forecasting. Transformers are a relatively new deep learning architecture, which has served as the foundation for groundbreaking technologies such as ChatGPT. While TL has been studied in the past, prior studies considered either one data-centric TL strategy or used older deep learning models such as Recurrent Neural Networks or Convolutional Neural Networks. Here, we carry out an extensive empirical study on six different data-centric TL strategies and analyse their performance under varying feature spaces. In addition to the vanilla Transformer architecture, we also experiment with Informer and PatchTST, specifically designed for time series forecasting. We use 16 datasets from the Building Data Genome Project 2 to create building energy consumption forecasting models. Experimental results reveal that while TL is generally beneficial, especially when the target domain has no data, careful selection of the exact TL strategy should be made to gain the maximum benefit. This decision largely depends on the feature space properties such as the recorded weather features. We also note that PatchTST outperforms the other two Transformer variants (vanilla Transformer and Informer). Our findings advance the building energy consumption forecasting using advanced approaches like TL and Transformer architectures. </p> </div> </dd> <dt> <a name='item579'>[579]</a> <a href ="/abs/2410.14729" title="Abstract" id="2410.14729"> arXiv:2410.14729 </a> (replaced) [<a href="/pdf/2410.14729" title="Download PDF" id="pdf-2410.14729" aria-labelledby="pdf-2410.14729">pdf</a>, <a href="https://arxiv.org/html/2410.14729v2" title="View HTML" id="html-2410.14729" aria-labelledby="html-2410.14729" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.14729" title="Other formats" id="oth-2410.14729" aria-labelledby="oth-2410.14729">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Is Less More? Exploring Token Condensation as Training-free Adaptation for CLIP </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Z">Zixin Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gong,+D">Dong Gong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+S">Sen Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+Z">Zi Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Luo,+Y">Yadan Luo</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 15 pages, 7 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI); Computation and Language (cs.CL); Machine Learning (cs.LG) </div> <p class='mathjax'> Contrastive language-image pre-training (CLIP) has shown remarkable generalization ability in image classification. However, CLIP sometimes encounters performance drops on downstream datasets during zero-shot inference. Test-time adaptation methods attempt to mitigate this by adjusting normalization layers or tuning context prompts with large batch sizes and extensive augmentations; yet, these methods are computationally intensive. This raises an important question: Is there a training-free approach that can efficiently address CLIP's performance drop in such cases? To explore this, we benchmark token condensation techniques, originally designed to enhance the efficiency of vision transformers, on CLIP zero-shot inference tasks. We observe that although token condensation may compromise in-domain accuracy, it surprisingly enhances CLIP's performance on certain cross-dataset benchmarks. This motivates two key inquiries: (1) Can token condensation serve as a "free-lunch" solution for CLIP zero-shot inference? (2) What criteria should guide condensation -- how can essential tokens be identified and redundant ones eliminated? To address these questions, we propose Token Condensation as Adaptation (TCA), a training-free adaptation method for CLIP by pruning class-irrelevant visual tokens while merging class-ambiguous tokens. As the first approach for CLIP's token efficiency, TCA demonstrates superior performance across cross-dataset tasks, achieving up to a 21.4\% improvement over the strongest baseline while reducing GFLOPs by 12.2\% to 48.9\%, with minimized hyperparameter dependency. </p> </div> </dd> <dt> <a name='item580'>[580]</a> <a href ="/abs/2410.16162" title="Abstract" id="2410.16162"> arXiv:2410.16162 </a> (replaced) [<a href="/pdf/2410.16162" title="Download PDF" id="pdf-2410.16162" aria-labelledby="pdf-2410.16162">pdf</a>, <a href="/format/2410.16162" title="Other formats" id="oth-2410.16162" aria-labelledby="oth-2410.16162">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Sparkle: Mastering Basic Spatial Capabilities in Vision Language Models Elicits Generalization to Composite Spatial Reasoning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Tang,+Y">Yihong Tang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Qu,+A">Ao Qu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Z">Zhaokai Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhuang,+D">Dingyi Zhuang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+Z">Zhaofeng Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ma,+W">Wei Ma</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+S">Shenhao Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zheng,+Y">Yunhan Zheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+Z">Zhan Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+J">Jinhua Zhao</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Computation and Language (cs.CL) </div> <p class='mathjax'> Vision language models (VLMs) have demonstrated impressive performance across a wide range of downstream tasks. However, their proficiency in spatial reasoning remains limited, despite its crucial role in tasks involving navigation and interaction with physical environments. Specifically, most of these tasks rely on the core spatial reasoning capabilities in two-dimensional (2D) environments, and our evaluation reveals that state-of-the-art VLMs frequently generate implausible and incorrect responses to composite spatial reasoning problems, including simple pathfinding tasks that humans can solve effortlessly at a glance. To address this, we explore an effective approach to enhance 2D spatial reasoning within VLMs by training the model solely on basic spatial capabilities. We begin by disentangling the key components of 2D spatial reasoning: direction comprehension, distance estimation, and localization. Our central hypothesis is that mastering these basic spatial capabilities can significantly enhance a model's performance on composite spatial tasks requiring advanced spatial understanding and combinatorial problem-solving, with generalized improvements in visual-spatial tasks. To investigate this hypothesis, we introduce Sparkle, a framework that fine-tunes VLMs on these three basic spatial capabilities by synthetic data generation and targeted supervision to form an instruction dataset for each capability. Our experiments demonstrate that VLMs fine-tuned with Sparkle achieve significant performance gains, not only in the basic tasks themselves but also in generalizing to composite and out-of-distribution spatial reasoning tasks. These findings underscore the effectiveness of mastering basic spatial capabilities in enhancing composite spatial problem-solving, offering insights into systematic strategies for improving VLMs' spatial reasoning capabilities. </p> </div> </dd> <dt> <a name='item581'>[581]</a> <a href ="/abs/2410.16520" title="Abstract" id="2410.16520"> arXiv:2410.16520 </a> (replaced) [<a href="/pdf/2410.16520" title="Download PDF" id="pdf-2410.16520" aria-labelledby="pdf-2410.16520">pdf</a>, <a href="https://arxiv.org/html/2410.16520v2" title="View HTML" id="html-2410.16520" aria-labelledby="html-2410.16520" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.16520" title="Other formats" id="oth-2410.16520" aria-labelledby="oth-2410.16520">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> AUTALIC: A Dataset for Anti-AUTistic Ableist Language In Context </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Rizvi,+N">Naba Rizvi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Strickland,+H">Harper Strickland</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gitelman,+D">Daniel Gitelman</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cooper,+T">Tristan Cooper</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Morales-Flores,+A">Alexis Morales-Flores</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Golden,+M">Michael Golden</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kallepalli,+A">Aekta Kallepalli</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Alurkar,+A">Akshat Alurkar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Owens,+H">Haaset Owens</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ahmedi,+S">Saleha Ahmedi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Khirwadkar,+I">Isha Khirwadkar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Munyaka,+I">Imani Munyaka</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ousidhoum,+N">Nedjma Ousidhoum</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 9 pages, 5 figures, 7 tables </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> As our understanding of autism and ableism continues to increase, so does our understanding of ableist language towards autistic people. Such language poses a significant challenge in NLP research due to its subtle and context-dependent nature. Yet, detecting anti-autistic ableist language remains underexplored, with existing NLP tools often failing to capture its nuanced expressions. We present AUTALIC, the first benchmark dataset dedicated to the detection of anti-autistic ableist language in context, addressing a significant gap in the field. The dataset comprises 2,400 autism-related sentences collected from Reddit, accompanied by surrounding context, and is annotated by trained experts with backgrounds in neurodiversity. Our comprehensive evaluation reveals that current language models, including state-of-the-art LLMs, struggle to reliably identify anti-autistic ableism and align with human judgments, underscoring their limitations in this domain. We publicly release AUTALIC along with the individual annotations which serve as a valuable resource to researchers working on ableism, neurodiversity, and also studying disagreements in annotation tasks. This dataset serves as a crucial step towards developing more inclusive and context-aware NLP systems that better reflect diverse perspectives. </p> </div> </dd> <dt> <a name='item582'>[582]</a> <a href ="/abs/2410.17754" title="Abstract" id="2410.17754"> arXiv:2410.17754 </a> (replaced) [<a href="/pdf/2410.17754" title="Download PDF" id="pdf-2410.17754" aria-labelledby="pdf-2410.17754">pdf</a>, <a href="https://arxiv.org/html/2410.17754v2" title="View HTML" id="html-2410.17754" aria-labelledby="html-2410.17754" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.17754" title="Other formats" id="oth-2410.17754" aria-labelledby="oth-2410.17754">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Puncturing Quantum Stabilizer Codes </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Gundersen,+J+S">Jaron Skovsted Gundersen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Christensen,+R+B">Ren茅 B酶dker Christensen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Grassl,+M">Markus Grassl</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Popovski,+P">Petar Popovski</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wisniewski,+R">Rafa艂 Wisniewski</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Preprint </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Information Theory (cs.IT)</span>; Rings and Algebras (math.RA) </div> <p class='mathjax'> Classical coding theory contains several techniques to obtain new codes from other codes, including puncturing and shortening. For quantum codes, a form of puncturing is known, but its description is based on the code space rather than its generators. In this work, we generalize the puncturing procedure to allow more freedom in the choice of which coded states are kept and which are removed. We describe this puncturing by focusing on the stabilizer matrix containing the generators of the code. In this way, we are able to explicitly describe the stabilizer matrix of the punctured code given the stabilizer matrix of the original stabilizer code. The additional freedom in the procedure also opens up new ways to construct new codes from old, and we present several ways to utilize this for the search of codes with good or even optimal parameters. In particular, we use the construction to obtain codes whose parameters exceed the best previously known. Lastly, we generalize the proof of the Griesmer bound from the classical setting to stabilizer codes since the proof relies heavily on the puncturing technique. </p> </div> </dd> <dt> <a name='item583'>[583]</a> <a href ="/abs/2410.18097" title="Abstract" id="2410.18097"> arXiv:2410.18097 </a> (replaced) [<a href="/pdf/2410.18097" title="Download PDF" id="pdf-2410.18097" aria-labelledby="pdf-2410.18097">pdf</a>, <a href="https://arxiv.org/html/2410.18097v3" title="View HTML" id="html-2410.18097" aria-labelledby="html-2410.18097" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.18097" title="Other formats" id="oth-2410.18097" aria-labelledby="oth-2410.18097">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> RRADistill: Distilling LLMs' Passage Ranking Ability for Long-Tail Queries Document Re-Ranking on a Search Engine </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Choi,+N">Nayoung Choi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lee,+Y">Youngjune Lee</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cho,+G">Gyu-Hwung Cho</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jeong,+H">Haeyu Jeong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kong,+J">Jungmin Kong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+S">Saehun Kim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Park,+K">Keunchan Park</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cho,+S">Sarah Cho</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jeong,+I">Inchang Jeong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nam,+G">Gyohee Nam</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Han,+S">Sunghoon Han</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+W">Wonil Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Choi,+J">Jaeho Choi</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted to EMNLP 2024 Industry Track. First two authors contributed equally </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Information Retrieval (cs.IR)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> Large Language Models (LLMs) excel at understanding the semantic relationships between queries and documents, even with lengthy and complex long-tail queries. These queries are challenging for feedback-based rankings due to sparse user engagement and limited feedback, making LLMs' ranking ability highly valuable. However, the large size and slow inference of LLMs necessitate the development of smaller, more efficient models (sLLMs). Recently, integrating ranking label generation into distillation techniques has become crucial, but existing methods underutilize LLMs' capabilities and are cumbersome. Our research, RRADistill: Re-Ranking Ability Distillation, propose an efficient label generation pipeline and novel sLLM training methods for both encoder and decoder models. We introduce an encoder-based method using a Term Control Layer to capture term matching signals and a decoder-based model with a ranking layer for enhanced understanding. A/B testing on a Korean-based search platform, validates the effectiveness of our approach in improving re-ranking for long-tail queries. </p> </div> </dd> <dt> <a name='item584'>[584]</a> <a href ="/abs/2410.21271" title="Abstract" id="2410.21271"> arXiv:2410.21271 </a> (replaced) [<a href="/pdf/2410.21271" title="Download PDF" id="pdf-2410.21271" aria-labelledby="pdf-2410.21271">pdf</a>, <a href="https://arxiv.org/html/2410.21271v2" title="View HTML" id="html-2410.21271" aria-labelledby="html-2410.21271" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.21271" title="Other formats" id="oth-2410.21271" aria-labelledby="oth-2410.21271">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> EoRA: Training-free Compensation for Compressed LLM with Eigenspace Low-Rank Approximation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+S">Shih-Yang Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+H">Huck Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+C">Chien-Yi Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fung,+N+C">Nai Chit Fung</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yin,+H">Hongxu Yin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sakr,+C">Charbel Sakr</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Muralidharan,+S">Saurav Muralidharan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cheng,+K">Kwang-Ting Cheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kautz,+J">Jan Kautz</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Y+F">Yu-Chiang Frank Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Molchanov,+P">Pavlo Molchanov</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+M">Min-Hung Chen</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> In this work, we re-formulate the model compression problem into the customized compensation problem: Given a compressed model, we aim to introduce residual low-rank paths to compensate for compression errors under customized requirements from users (e.g., tasks, compression ratios), resulting in greater flexibility in adjusting overall capacity without being constrained by specific compression formats. However, naively applying SVD to derive residual paths causes suboptimal utilization of the low-rank representation capacity. Instead, we propose Training-free Eigenspace Low-Rank Approximation (EoRA), a method that directly minimizes compression-induced errors without requiring gradient-based training, achieving fast optimization in minutes using a small amount of calibration data. EoRA projects compression errors into the eigenspace of input activations, leveraging eigenvalues to effectively prioritize the reconstruction of high-importance error components. Moreover, EoRA can be seamlessly integrated with fine-tuning and quantization to further improve effectiveness and efficiency. EoRA consistently outperforms previous methods in compensating errors for compressed LLaMA2/3 models on various tasks, such as language generation, commonsense reasoning, and math reasoning tasks (e.g., 31.31%/12.88% and 9.69% improvements on ARC-Easy/ARC-Challenge and MathQA when compensating LLaMA3-8B that is quantized to 4-bit and pruned to 2:4 sparsity). EoRA offers a scalable, training-free solution to compensate for compression errors, making it a powerful tool to deploy LLMs in various capacity and efficiency requirements. </p> </div> </dd> <dt> <a name='item585'>[585]</a> <a href ="/abs/2410.22100" title="Abstract" id="2410.22100"> arXiv:2410.22100 </a> (replaced) [<a href="/pdf/2410.22100" title="Download PDF" id="pdf-2410.22100" aria-labelledby="pdf-2410.22100">pdf</a>, <a href="https://arxiv.org/html/2410.22100v2" title="View HTML" id="html-2410.22100" aria-labelledby="html-2410.22100" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.22100" title="Other formats" id="oth-2410.22100" aria-labelledby="oth-2410.22100">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> MStableChain: Towards Multi-Native Stablecoins in EVM-Compatible Blockchain for Stable Fee and Mass Adoption </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+M">Mingzhe Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gao,+B">Bo Gao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Toyoda,+K">Kentaroh Toyoda</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+Y">Yechao Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Samsudin,+J">Juniarto Samsudin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+H">Haibin Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lu,+S">Sifei Lu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tng,+T+H">Tai Hou Tng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Choo,+K">Kerching Choo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ting,+A">Andy Ting</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Goh,+S+M+R">Siow Mong Rick Goh</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wei,+Q">Qingsong Wei</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> In submission to IEEE TSC </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computational Engineering, Finance, and Science (cs.CE)</span> </div> <p class='mathjax'> Traditional blockchain systems, such as Ethereum, typically rely on a \emph{single volatile cryptocurrency for transaction fees}. This leads to fluctuating transaction fee prices and limits the flexibility of users' payment options. To address these issues, we propose MStableChain, which leverage multiple stablecoins as native tokens for transaction fee settlements, thus ensuring stable transaction fees and flexible payment options. To address the challenges of mass adoption and practicality, we propose several core designs. To maintain compatibility with the Ethereum Virtual Machine (EVM) for mass adoption while supporting multiple native stablecoins, MStableChain employs a multi-currency units, multi-type RPCs mechanism. This mechanism enables the system to handle multiple stablecoins without altering the EVM or requiring changes to user applications. Furthermore, an oracle-based gas fee adjustment mechanism is proposed to manage exchange rates between different stablecoins, ensuring equitable transaction costs across various currencies. The system also introduces a secure, on-chain voting-based management protocol for the administrative functions related to these stablecoins. Experimental results from a prototype implementation demonstrate that MStableChain provides stable transaction fee prices, high effectiveness, and good usability. </p> </div> </dd> <dt> <a name='item586'>[586]</a> <a href ="/abs/2410.22649" title="Abstract" id="2410.22649"> arXiv:2410.22649 </a> (replaced) [<a href="/pdf/2410.22649" title="Download PDF" id="pdf-2410.22649" aria-labelledby="pdf-2410.22649">pdf</a>, <a href="https://arxiv.org/html/2410.22649v2" title="View HTML" id="html-2410.22649" aria-labelledby="html-2410.22649" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.22649" title="Other formats" id="oth-2410.22649" aria-labelledby="oth-2410.22649">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> WaveRoRA: Wavelet Rotary Route Attention for Multivariate Time Series Forecasting </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Liang,+A">Aobo Liang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sun,+Y">Yan Sun</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Guizani,+N">Nadra Guizani</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Model architecture changed </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> In recent years, Transformer-based models (Transformers) have achieved significant success in multivariate time series forecasting (MTSF). However, previous works focus on extracting features either from the time domain or the frequency domain, which inadequately captures the trends and periodic characteristics. To address this issue, we propose a wavelet learning framework to model complex temporal dependencies of the time series data. The wavelet domain integrates both time and frequency information, allowing for the analysis of local characteristics of signals at different scales. Additionally, the Softmax self-attention mechanism used by Transformers has quadratic complexity, which leads to excessive computational costs when capturing long-term dependencies. Therefore, we propose a novel attention mechanism: Rotary Route Attention (RoRA). Unlike Softmax attention, RoRA utilizes rotary position embeddings to inject relative positional information to sequence tokens and introduces a small number of routing tokens $r$ to aggregate information from the $KV$ matrices and redistribute it to the $Q$ matrix, offering linear complexity. We further propose WaveRoRA, which leverages RoRA to capture inter-series dependencies in the wavelet domain. We conduct extensive experiments on eight real-world datasets. The results indicate that WaveRoRA outperforms existing state-of-the-art models while maintaining lower computational costs. Our code is available at <a href="https://github.com/Leopold2333/WaveRoRA" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item587'>[587]</a> <a href ="/abs/2410.23279" title="Abstract" id="2410.23279"> arXiv:2410.23279 </a> (replaced) [<a href="/pdf/2410.23279" title="Download PDF" id="pdf-2410.23279" aria-labelledby="pdf-2410.23279">pdf</a>, <a href="https://arxiv.org/html/2410.23279v3" title="View HTML" id="html-2410.23279" aria-labelledby="html-2410.23279" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.23279" title="Other formats" id="oth-2410.23279" aria-labelledby="oth-2410.23279">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Transformer Model for Segmentation, Classification, and Caller Identification of Marmoset Vocalization </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+B">Bin Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Takamichi,+S">Shinnosuke Takamichi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sakti,+S">Sakriani Sakti</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nakamura,+S">Satoshi Nakamura</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Artificial Intelligence (cs.AI); Audio and Speech Processing (eess.AS) </div> <p class='mathjax'> Marmoset, a highly vocalized primate, has become a popular animal model for studying social-communicative behavior and its underlying mechanism comparing with human infant linguistic developments. In the study of vocal communication, it is vital to know the caller identities, call contents, and vocal exchanges. Previous work of a CNN has achieved a joint model for call segmentation, classification, and caller identification for marmoset vocalizations. However, the CNN has limitations in modeling long-range acoustic patterns; the Transformer architecture that has been shown to outperform CNNs, utilizes the self-attention mechanism that efficiently segregates information parallelly over long distances and captures the global structure of marmoset vocalization. We propose using the Transformer to jointly segment and classify the marmoset calls and identify the callers for each vocalization. </p> </div> </dd> <dt> <a name='item588'>[588]</a> <a href ="/abs/2410.23773" title="Abstract" id="2410.23773"> arXiv:2410.23773 </a> (replaced) [<a href="/pdf/2410.23773" title="Download PDF" id="pdf-2410.23773" aria-labelledby="pdf-2410.23773">pdf</a>, <a href="/format/2410.23773" title="Other formats" id="oth-2410.23773" aria-labelledby="oth-2410.23773">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Towards Generative Ray Path Sampling for Faster Point-to-Point Ray Tracing </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Eertmans,+J">J茅rome Eertmans</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Di+Cicco,+N">Nicola Di Cicco</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Oestges,+C">Claude Oestges</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jacques,+L">Laurent Jacques</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Vittuci,+E+M">Enrico M. Vittuci</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Degli-Esposti,+V">Vittorio Degli-Esposti</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 6 pages, 6 figures, submitted to IEEE ICMLCN 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Signal Processing (eess.SP) </div> <p class='mathjax'> Radio propagation modeling is essential in telecommunication research, as radio channels result from complex interactions with environmental objects. Recently, Machine Learning has been attracting attention as a potential alternative to computationally demanding tools, like Ray Tracing, which can model these interactions in detail. However, existing Machine Learning approaches often attempt to learn directly specific channel characteristics, such as the coverage map, making them highly specific to the frequency and material properties and unable to fully capture the underlying propagation mechanisms. Hence, Ray Tracing, particularly the Point-to-Point variant, remains popular to accurately identify all possible paths between transmitter and receiver nodes. Still, path identification is computationally intensive because the number of paths to be tested grows exponentially while only a small fraction is valid. In this paper, we propose a Machine Learning-aided Ray Tracing approach to efficiently sample potential ray paths, significantly reducing the computational load while maintaining high accuracy. Our model dynamically learns to prioritize potentially valid paths among all possible paths and scales linearly with scene complexity. Unlike recent alternatives, our approach is invariant with translation, scaling, or rotation of the geometry, and avoids dependency on specific environment characteristics. </p> </div> </dd> <dt> <a name='item589'>[589]</a> <a href ="/abs/2410.23792" title="Abstract" id="2410.23792"> arXiv:2410.23792 </a> (replaced) [<a href="/pdf/2410.23792" title="Download PDF" id="pdf-2410.23792" aria-labelledby="pdf-2410.23792">pdf</a>, <a href="/format/2410.23792" title="Other formats" id="oth-2410.23792" aria-labelledby="oth-2410.23792">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Bibliometrics effects of a new item-by-item classification system based on reference reclassification </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Pena-Rocha,+M">Marcos Pena-Rocha</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gomez-Crisostomo,+M+R">Maria Rocio Gomez-Crisostomo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Guerrero-Bote,+V+P">Vicente Pablo Guerrero-Bote</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=de+Moya-Anegon,+F">Felix de Moya-Anegon</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Digital Libraries (cs.DL)</span> </div> <p class='mathjax'> This study presents a comparative analysis between two scientific document classification systems. The first system employs the Scopus journal-based assignment method, adapted to a fractional model, while the second system uses an item-by-item system based on reclassified references according to the origin of the citers. The study's results are divided into three different sections: the first involves comparisons at the Scopus area level, the second examines comparisons at the category level, and the third tests various bibliometric indicators to identify the variations between the two systems. Highlighting the characteristics of the paper level system, it offers a reduction in the number of categories to which each document is assigned, achieving higher values of single-category assignment compared to the All Science Journal Classification (ASJC). When reclassifying areas and categories, the paper level system tends to accentuate differences at the extreme values, increasing the size of the largest categories and reducing that of the smallest ones. Moreover, the paper-by-paper system provides more homogeneous distributions in normalised impacts and adjusts values related to excellence more uniformly. </p> </div> </dd> <dt> <a name='item590'>[590]</a> <a href ="/abs/2410.24079" title="Abstract" id="2410.24079"> arXiv:2410.24079 </a> (replaced) [<a href="/pdf/2410.24079" title="Download PDF" id="pdf-2410.24079" aria-labelledby="pdf-2410.24079">pdf</a>, <a href="https://arxiv.org/html/2410.24079v2" title="View HTML" id="html-2410.24079" aria-labelledby="html-2410.24079" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.24079" title="Other formats" id="oth-2410.24079" aria-labelledby="oth-2410.24079">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Hamiltonian Monte Carlo Inference of Marginalized Linear Mixed-Effects Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Lai,+J">Jinlin Lai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Domke,+J">Justin Domke</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sheldon,+D">Daniel Sheldon</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 38th Conference on Neural Information Processing Systems (NeurIPS 2024) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Machine Learning (stat.ML) </div> <p class='mathjax'> Bayesian reasoning in linear mixed-effects models (LMMs) is challenging and often requires advanced sampling techniques like Markov chain Monte Carlo (MCMC). A common approach is to write the model in a probabilistic programming language and then sample via Hamiltonian Monte Carlo (HMC). However, there are many ways a user can transform a model that make inference more or less efficient. In particular, marginalizing some variables can greatly improve inference but is difficult for users to do manually. We develop an algorithm to easily marginalize random effects in LMMs. A naive approach introduces cubic time operations within an inference algorithm like HMC, but we reduce the running time to linear using fast linear algebra techniques. We show that marginalization is always beneficial when applicable and highlight improvements in various models, especially ones from cognitive sciences. </p> </div> </dd> <dt> <a name='item591'>[591]</a> <a href ="/abs/2410.24160" title="Abstract" id="2410.24160"> arXiv:2410.24160 </a> (replaced) [<a href="/pdf/2410.24160" title="Download PDF" id="pdf-2410.24160" aria-labelledby="pdf-2410.24160">pdf</a>, <a href="https://arxiv.org/html/2410.24160v2" title="View HTML" id="html-2410.24160" aria-labelledby="html-2410.24160" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.24160" title="Other formats" id="oth-2410.24160" aria-labelledby="oth-2410.24160">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Redefining <Creative> in Dictionary: Towards an Enhanced Semantic Understanding of Creative Generation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Feng,+F">Fu Feng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xie,+Y">Yucheng Xie</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+X">Xu Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+J">Jing Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Geng,+X">Xin Geng</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Computation and Language (cs.CL) </div> <p class='mathjax'> ``Creative'' remains an inherently abstract concept for both humans and diffusion models. While text-to-image (T2I) diffusion models can easily generate out-of-domain concepts like ``a blue banana'', they struggle with generating combinatorial objects such as ``a creative mixture that resembles a lettuce and a mantis'', due to difficulties in understanding the semantic depth of ``creative''. Current methods rely heavily on synthesizing reference prompts or images to achieve a creative effect, typically requiring retraining for each unique creative output -- a process that is computationally intensive and limits practical applications. To address this, we introduce CreTok, which brings meta-creativity to diffusion models by redefining ``creative'' as a new token, \texttt{<CreTok>}, thus enhancing models' semantic understanding for combinatorial creativity. CreTok achieves such redefinition by iteratively sampling diverse text pairs from our proposed CangJie dataset to form adaptive prompts and restrictive prompts, and then optimizing the similarity between their respective text embeddings. Extensive experiments demonstrate that \texttt{<CreTok>} enables the universal and direct generation of combinatorial creativity across diverse concepts without additional training (4s vs. BASS's 2400s per image), achieving state-of-the-art performance with improved text-image alignment ($\uparrow$0.03 in VQAScore) and higher human preference ratings ($\uparrow$0.009 in PickScore and $\uparrow$0.169 in ImageReward). Further evaluations with GPT-4o and user studies underscore CreTok's strengths in advancing creative generation. </p> </div> </dd> <dt> <a name='item592'>[592]</a> <a href ="/abs/2411.00262" title="Abstract" id="2411.00262"> arXiv:2411.00262 </a> (replaced) [<a href="/pdf/2411.00262" title="Download PDF" id="pdf-2411.00262" aria-labelledby="pdf-2411.00262">pdf</a>, <a href="https://arxiv.org/html/2411.00262v2" title="View HTML" id="html-2411.00262" aria-labelledby="html-2411.00262" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.00262" title="Other formats" id="oth-2411.00262" aria-labelledby="oth-2411.00262">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Content Aware Analysis of Scholarly Networks: A Case Study on CORD19 Dataset </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Akbulut,+M+E">Mehmet Emre Akbulut</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nacar,+Y+E">Yusuf Erdem Nacar</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Social and Information Networks (cs.SI)</span>; Information Retrieval (cs.IR); Physics and Society (physics.soc-ph) </div> <p class='mathjax'> This paper investigates the relationships among key elements of the scientific research network, namely articles, researchers, and journals. We introduce a novel approach to use semantic information through the HITS algorithm-based propagation of topic information in the network. The topic information is derived by using the Named Entity Recognition and Entity Linkage. In our case, MedCAT is used to extract the topics from the CORD19 Dataset, which is a corpus of academic articles about COVID-19 and the coronavirus scientific network. Our approach focuses on the COVID-19 domain, utilizing the CORD-19 dataset to demonstrate the efficacy of integrating topic-related information within the citation framework. Through the application of a hybrid HITS algorithm, we show that incorporating topic data significantly influences article rankings, revealing deeper insights into the structure of the academic community. </p> </div> </dd> <dt> <a name='item593'>[593]</a> <a href ="/abs/2411.00656" title="Abstract" id="2411.00656"> arXiv:2411.00656 </a> (replaced) [<a href="/pdf/2411.00656" title="Download PDF" id="pdf-2411.00656" aria-labelledby="pdf-2411.00656">pdf</a>, <a href="https://arxiv.org/html/2411.00656v2" title="View HTML" id="html-2411.00656" aria-labelledby="html-2411.00656" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.00656" title="Other formats" id="oth-2411.00656" aria-labelledby="oth-2411.00656">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Identification of Analytic Nonlinear Dynamical Systems with Non-asymptotic Guarantees </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Musavi,+N">Negin Musavi</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Guo,+Z">Ziyao Guo</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Dullerud,+G">Geir Dullerud</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Li,+Y">Yingying Li</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> NeurIPS 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Systems and Control (eess.SY)</span> </div> <p class='mathjax'> This paper focuses on the system identification of an important class of nonlinear systems: linearly parameterized nonlinear systems, which enjoys wide applications in robotics and other mechanical systems. We consider two system identification methods: least-squares estimation (LSE), which is a point estimation method; and set-membership estimation (SME), which estimates an uncertainty set that contains the true parameters. We provide non-asymptotic convergence rates for LSE and SME under i.i.d. control inputs and control policies with i.i.d. random perturbations, both of which are considered as non-active-exploration inputs. Compared with the counter-example based on piecewise-affine systems in the literature, the success of non-active exploration in our setting relies on a key assumption on the system dynamics: we require the system functions to be real-analytic. Our results, together with the piecewise-affine counter-example, reveal the importance of differentiability in nonlinear system identification through non-active exploration. Lastly, we numerically compare our theoretical bounds with the empirical performance of LSE and SME on a pendulum example and a quadrotor example. </p> </div> </dd> <dt> <a name='item594'>[594]</a> <a href ="/abs/2411.00774" title="Abstract" id="2411.00774"> arXiv:2411.00774 </a> (replaced) [<a href="/pdf/2411.00774" title="Download PDF" id="pdf-2411.00774" aria-labelledby="pdf-2411.00774">pdf</a>, <a href="https://arxiv.org/html/2411.00774v3" title="View HTML" id="html-2411.00774" aria-labelledby="html-2411.00774" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.00774" title="Other formats" id="oth-2411.00774" aria-labelledby="oth-2411.00774">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Freeze-Omni: A Smart and Low Latency Speech-to-speech Dialogue Model with Frozen LLM </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+X">Xiong Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+Y">Yangze Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fu,+C">Chaoyou Fu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shen,+Y">Yunhang Shen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xie,+L">Lei Xie</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+K">Ke Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sun,+X">Xing Sun</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ma,+L">Long Ma</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Project Page: <a href="https://freeze-omni.github.io/" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Artificial Intelligence (cs.AI); Computation and Language (cs.CL); Audio and Speech Processing (eess.AS) </div> <p class='mathjax'> Rapidly developing large language models (LLMs) have brought tremendous intelligent applications. Especially, the GPT-4o's excellent duplex speech interaction ability has brought impressive experience to users. Researchers have recently proposed several multi-modal LLMs in this direction that can achieve user-agent speech-to-speech conversations. This paper proposes a novel speech-text multimodal LLM architecture called Freeze-Omni. Our main contribution is that the speech input and output modalities can be easily connected to a textual LLM while keeping the LLM's parameters frozen throughout the training process. We design a three-stage training strategy for modeling both the speech input and output, enabling Freeze-Omni to obtain speech-to-speech conversation ability using text-speech paired data (such as ASR and TTS data) and only 60,000 multi-round text Q&A data on 8 GPUs. Moreover, we can effectively ensure that the intelligence of the Freeze-Omni in the speech modality is at the same level compared with that in the text modality of its backbone LLM, while achieving low latency end-to-end spoken response. In addition, we also designed a method to achieve duplex dialogue ability through multi-task training, giving Freeze-Omni a more natural style of dialogue ability between users and agents. In summary, Freeze-Omni holds great potential to conduct speech-to-speech dialogue based on a multimodal LLM under the condition of a frozen LLM, avoiding the catastrophic forgetting problem caused by limited data and training resources. </p> </div> </dd> <dt> <a name='item595'>[595]</a> <a href ="/abs/2411.01940" title="Abstract" id="2411.01940"> arXiv:2411.01940 </a> (replaced) [<a href="/pdf/2411.01940" title="Download PDF" id="pdf-2411.01940" aria-labelledby="pdf-2411.01940">pdf</a>, <a href="/format/2411.01940" title="Other formats" id="oth-2411.01940" aria-labelledby="oth-2411.01940">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Systematic Mapping Study on Requirements Engineering for Regulatory Compliance of Software Systems </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Kosenkov,+O">Oleksandr Kosenkov</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Elahidoost,+P">Parisa Elahidoost</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gorschek,+T">Tony Gorschek</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fischbach,+J">Jannik Fischbach</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mendez,+D">Daniel Mendez</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Unterkalmsteiner,+M">Michael Unterkalmsteiner</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fucci,+D">Davide Fucci</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mohanani,+R">Rahul Mohanani</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted to "Information and Software Technology" Journal </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Software Engineering (cs.SE)</span>; Computers and Society (cs.CY) </div> <p class='mathjax'> Context: As the diversity and complexity of regulations affecting Software-Intensive Products and Services (SIPS) is increasing, software engineers need to address the growing regulatory scrutiny. As with any other non-negotiable requirements, SIPS compliance should be addressed early in SIPS engineering - i.e., during requirements engineering (RE). Objectives: In the conditions of the expanding regulatory landscape, existing research offers scattered insights into regulatory compliance of SIPS. This study addresses the pressing need for a structured overview of the state of the art in software RE and its contribution to regulatory compliance of SIPS. Method: We conducted a systematic mapping study to provide an overview of the current state of research regarding challenges, principles and practices for regulatory compliance of SIPS related to RE. We focused on the role of RE and its contribution to other SIPS lifecycle phases. We retrieved 6914 studies published from 2017 until 2023 from four academic databases, which we filtered down to 280 relevant primary studies. Results: We identified and categorized the RE-related challenges in regulatory compliance of SIPS and their potential connection to six types of principles and practices. We found that about 13.6% of the primary studies considered the involvement of both software engineers and legal experts. About 20.7% of primary studies considered RE in connection to other process areas. Most primary studies focused on a few popular regulation fields and application domains. Our results suggest that there can be differences in terms of challenges and involvement of stakeholders across different fields of regulation. Conclusion: Our findings highlight the need for an in-depth investigation of stakeholders' roles, relationships between process areas, and specific challenges for distinct regulatory fields to guide research and practice. </p> </div> </dd> <dt> <a name='item596'>[596]</a> <a href ="/abs/2411.02193" title="Abstract" id="2411.02193"> arXiv:2411.02193 </a> (replaced) [<a href="/pdf/2411.02193" title="Download PDF" id="pdf-2411.02193" aria-labelledby="pdf-2411.02193">pdf</a>, <a href="https://arxiv.org/html/2411.02193v2" title="View HTML" id="html-2411.02193" aria-labelledby="html-2411.02193" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.02193" title="Other formats" id="oth-2411.02193" aria-labelledby="oth-2411.02193">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Improving Steering Vectors by Targeting Sparse Autoencoder Features </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Chalnev,+S">Sviatoslav Chalnev</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Siu,+M">Matthew Siu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Conmy,+A">Arthur Conmy</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 8 maintext pages and 9 appendix pages </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Computation and Language (cs.CL) </div> <p class='mathjax'> To control the behavior of language models, steering methods attempt to ensure that outputs of the model satisfy specific pre-defined properties. Adding steering vectors to the model is a promising method of model control that is easier than finetuning, and may be more robust than prompting. However, it can be difficult to anticipate the effects of steering vectors produced by methods such as CAA [Panickssery et al., 2024] or the direct use of SAE latents [Templeton et al., 2024]. In our work, we address this issue by using SAEs to measure the effects of steering vectors, giving us a method that can be used to understand the causal effect of any steering vector intervention. We use this method for measuring causal effects to develop an improved steering method, SAE-Targeted Steering (SAE-TS), which finds steering vectors to target specific SAE features while minimizing unintended side effects. We show that overall, SAE-TS balances steering effects with coherence better than CAA and SAE feature steering, when evaluated on a range of tasks. </p> </div> </dd> <dt> <a name='item597'>[597]</a> <a href ="/abs/2411.02306" title="Abstract" id="2411.02306"> arXiv:2411.02306 </a> (replaced) [<a href="/pdf/2411.02306" title="Download PDF" id="pdf-2411.02306" aria-labelledby="pdf-2411.02306">pdf</a>, <a href="/format/2411.02306" title="Other formats" id="oth-2411.02306" aria-labelledby="oth-2411.02306">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> On Targeted Manipulation and Deception when Optimizing LLMs for User Feedback </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Williams,+M">Marcus Williams</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Carroll,+M">Micah Carroll</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Narang,+A">Adhyyan Narang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Weisser,+C">Constantin Weisser</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Murphy,+B">Brendan Murphy</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dragan,+A">Anca Dragan</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> As LLMs become more widely deployed, there is increasing interest in directly optimizing for feedback from end users (e.g. thumbs up) in addition to feedback from paid annotators. However, training to maximize human feedback creates a perverse incentive structure for the AI to resort to manipulative or deceptive tactics to obtain positive feedback from users who are vulnerable to such strategies. We study this phenomenon by training LLMs with Reinforcement Learning with simulated user feedback in environments of practical LLM usage. In our settings, we find that: 1) Extreme forms of "feedback gaming" such as manipulation and deception are learned reliably; 2) Even if only 2% of users are vulnerable to manipulative strategies, LLMs learn to identify and target them while behaving appropriately with other users, making such behaviors harder to detect; 3) To mitigate this issue, it may seem promising to leverage continued safety training or LLM-as-judges during training to filter problematic outputs. Instead, we found that while such approaches help in some of our settings, they backfire in others, sometimes even leading to subtler manipulative behaviors. We hope our results can serve as a case study which highlights the risks of using gameable feedback sources -- such as user feedback -- as a target for RL. </p> </div> </dd> <dt> <a name='item598'>[598]</a> <a href ="/abs/2411.02775" title="Abstract" id="2411.02775"> arXiv:2411.02775 </a> (replaced) [<a href="/pdf/2411.02775" title="Download PDF" id="pdf-2411.02775" aria-labelledby="pdf-2411.02775">pdf</a>, <a href="https://arxiv.org/html/2411.02775v2" title="View HTML" id="html-2411.02775" aria-labelledby="html-2411.02775" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.02775" title="Other formats" id="oth-2411.02775" aria-labelledby="oth-2411.02775">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Winemaking: Extracting Essential Insights for Efficient Threat Detection in Audit Logs </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+W">Weiheng Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Qiao,+W">Wei Qiao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yan,+W">Wenhao Yan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jiang,+B">Bo Jiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Y">Yuling Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+B">Baoxu Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lu,+Z">Zhigang Lu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+J">JunRong Liu</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 8 pages body, 11 pages total(without authors) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Cryptography and Security (cs.CR)</span> </div> <p class='mathjax'> Advanced Persistent Threats (APTs) are continuously evolving, leveraging their stealthiness and persistence to put increasing pressure on current provenance-based Intrusion Detection Systems (IDS). This evolution exposes several critical issues: (1) The dense interaction between malicious and benign nodes within provenance graphs introduces neighbor noise, hindering effective detection; (2) The complex prediction mechanisms of existing APTs detection models lead to the insufficient utilization of prior knowledge embedded in the data; (3) The high computational cost makes detection impractical. <br>To address these challenges, we propose Winemaking, a lightweight threat detection system built on a knowledge distillation framework, capable of node-level detection within audit log provenance graphs. Specifically, Winemaking applies graph Laplacian regularization to reduce neighbor noise, obtaining smoothed and denoised graph signals. Subsequently, Winemaking employs a teacher model based on GNNs to extract knowledge, which is then distilled into a lightweight student model. The student model is designed as a trainable combination of a feature transformation module and a personalized PageRank random walk label propagation module, with the former capturing feature knowledge and the latter learning label and structural knowledge. After distillation, the student model benefits from the knowledge of the teacher model to perform precise threat detection. We evaluate Winemaking through extensive experiments on three public datasets and compare its performance against several state-of-the-art IDS solutions. The results demonstrate that Winemaking achieves outstanding detection accuracy across all scenarios and the detection time is 1.4 to 5.2 times faster than the current state-of-the-art methods. </p> </div> </dd> <dt> <a name='item599'>[599]</a> <a href ="/abs/2411.02853" title="Abstract" id="2411.02853"> arXiv:2411.02853 </a> (replaced) [<a href="/pdf/2411.02853" title="Download PDF" id="pdf-2411.02853" aria-labelledby="pdf-2411.02853">pdf</a>, <a href="https://arxiv.org/html/2411.02853v2" title="View HTML" id="html-2411.02853" aria-labelledby="html-2411.02853" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.02853" title="Other formats" id="oth-2411.02853" aria-labelledby="oth-2411.02853">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> ADOPT: Modified Adam Can Converge with Any $\beta_2$ with the Optimal Rate </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Taniguchi,+S">Shohei Taniguchi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Harada,+K">Keno Harada</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Minegishi,+G">Gouki Minegishi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Oshima,+Y">Yuta Oshima</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jeong,+S+C">Seong Cheol Jeong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nagahara,+G">Go Nagahara</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Iiyama,+T">Tomoshi Iiyama</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Suzuki,+M">Masahiro Suzuki</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Iwasawa,+Y">Yusuke Iwasawa</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Matsuo,+Y">Yutaka Matsuo</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted at Neural Information Processing Systems (NeurIPS 2024) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Machine Learning (stat.ML) </div> <p class='mathjax'> Adam is one of the most popular optimization algorithms in deep learning. However, it is known that Adam does not converge in theory unless choosing a hyperparameter, i.e., $\beta_2$, in a problem-dependent manner. There have been many attempts to fix the non-convergence (e.g., AMSGrad), but they require an impractical assumption that the gradient noise is uniformly bounded. In this paper, we propose a new adaptive gradient method named ADOPT, which achieves the optimal convergence rate of $\mathcal{O} ( 1 / \sqrt{T} )$ with any choice of $\beta_2$ without depending on the bounded noise assumption. ADOPT addresses the non-convergence issue of Adam by removing the current gradient from the second moment estimate and changing the order of the momentum update and the normalization by the second moment estimate. We also conduct intensive numerical experiments, and verify that our ADOPT achieves superior results compared to Adam and its variants across a wide range of tasks, including image classification, generative modeling, natural language processing, and deep reinforcement learning. The implementation is available at <a href="https://github.com/iShohei220/adopt" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item600'>[600]</a> <a href ="/abs/2411.03346" title="Abstract" id="2411.03346"> arXiv:2411.03346 </a> (replaced) [<a href="/pdf/2411.03346" title="Download PDF" id="pdf-2411.03346" aria-labelledby="pdf-2411.03346">pdf</a>, <a href="https://arxiv.org/html/2411.03346v2" title="View HTML" id="html-2411.03346" aria-labelledby="html-2411.03346" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.03346" title="Other formats" id="oth-2411.03346" aria-labelledby="oth-2411.03346">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Fixing Security Vulnerabilities with AI in OSS-Fuzz </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Y">Yuntong Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+J">Jiawei Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Berzin,+D">Dominic Berzin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mirchev,+M">Martin Mirchev</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+D">Dongge Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Arya,+A">Abhishek Arya</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chang,+O">Oliver Chang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Roychoudhury,+A">Abhik Roychoudhury</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Cryptography and Security (cs.CR)</span>; Software Engineering (cs.SE) </div> <p class='mathjax'> Critical open source software systems undergo significant validation in the form of lengthy fuzz campaigns. The fuzz campaigns typically conduct a biased random search over the domain of program inputs, to find inputs which crash the software system. Such fuzzing is useful to enhance the security of software systems in general since even closed source software may use open source components. Hence testing open source software is of paramount importance. Currently OSS-Fuzz is the most significant and widely used infrastructure for continuous validation of open source systems. Unfortunately even though OSS-Fuzz has identified more than 10,000 vulnerabilities across 1000 or more software projects, the detected vulnerabilities may remain unpatched, as vulnerability fixing is often manual in practice. In this work, we rely on the recent progress in Large Language Model (LLM) agents for autonomous program improvement including bug fixing. We customise the well-known AutoCodeRover agent for fixing security vulnerabilities. This is because LLM agents like AutoCodeRover fix bugs from issue descriptions via code search. Instead for security patching, we rely on the test execution of the exploit input to extract code elements relevant to the fix. Our experience with OSS-Fuzz vulnerability data shows that LLM agent autonomy is useful for successful security patching, as opposed to approaches like Agentless where the control flow is fixed. More importantly our findings show that we cannot measure quality of patches by code similarity of the patch with reference codes (as in CodeBLEU scores used in VulMaster), since patches with high CodeBLEU scores still fail to pass given the given exploit input. Our findings indicate that security patch correctness needs to consider dynamic attributes like test executions as opposed to relying of standard text/code similarity metrics. </p> </div> </dd> <dt> <a name='item601'>[601]</a> <a href ="/abs/2411.03416" title="Abstract" id="2411.03416"> arXiv:2411.03416 </a> (replaced) [<a href="/pdf/2411.03416" title="Download PDF" id="pdf-2411.03416" aria-labelledby="pdf-2411.03416">pdf</a>, <a href="https://arxiv.org/html/2411.03416v2" title="View HTML" id="html-2411.03416" aria-labelledby="html-2411.03416" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.03416" title="Other formats" id="oth-2411.03416" aria-labelledby="oth-2411.03416">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Accelerating Gaussian Variational Inference for Motion Planning Under Uncertainty </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Chang,+Z">Zinuo Chang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+H">Hongzhe Yu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Vela,+P">Patricio Vela</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+Y">Yongxin Chen</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 7 pages </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span> </div> <p class='mathjax'> This work addresses motion planning under uncertainty as a stochastic optimal control problem. The path distribution induced by the optimal controller corresponds to a posterior path distribution with a known form. To approximate this posterior, we frame an optimization problem in the space of Gaussian distributions, which aligns with the Gaussian Variational Inference Motion Planning (GVIMP) paradigm introduced in \cite{yu2023gaussian}. In this framework, the computation bottleneck lies in evaluating the expectation of collision costs over a dense discretized trajectory and computing the marginal covariances. This work exploits the sparse motion planning factor graph, which allows for parallel computing collision costs and Gaussian Belief Propagation (GBP) marginal covariance computation, to introduce a computationally efficient approach to solving GVIMP. We term the novel paradigm as the Parallel Gaussian Variational Inference Motion Planning (P-GVIMP). We validate the proposed framework on various robotic systems, demonstrating significant speed acceleration achieved by leveraging Graphics Processing Units (GPUs) for parallel computation. An open-sourced implementation is presented at <a href="https://github.com/hzyu17/VIMP" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item602'>[602]</a> <a href ="/abs/2411.03795" title="Abstract" id="2411.03795"> arXiv:2411.03795 </a> (replaced) [<a href="/pdf/2411.03795" title="Download PDF" id="pdf-2411.03795" aria-labelledby="pdf-2411.03795">pdf</a>, <a href="https://arxiv.org/html/2411.03795v2" title="View HTML" id="html-2411.03795" aria-labelledby="html-2411.03795" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.03795" title="Other formats" id="oth-2411.03795" aria-labelledby="oth-2411.03795">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> VQA$^2$: Visual Question Answering for Video Quality Assessment </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Jia,+Z">Ziheng Jia</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Z">Zicheng Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Qian,+J">Jiaying Qian</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+H">Haoning Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sun,+W">Wei Sun</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+C">Chunyi Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+X">Xiaohong Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lin,+W">Weisi Lin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhai,+G">Guangtao Zhai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Min,+X">Xiongkuo Min</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 24 pages 12 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> The advent and proliferation of large multi-modal models (LMMs) have introduced new paradigms to computer vision, transforming various tasks into a unified visual question answering framework. Video Quality Assessment (VQA), a classic field in low-level visual perception, focused initially on quantitative video quality scoring. However, driven by advances in LMMs, it is now progressing toward more holistic visual quality understanding tasks. Recent studies in the image domain have demonstrated that Visual Question Answering (VQA) can markedly enhance low-level visual quality evaluation. Nevertheless, related work has not been explored in the video domain, leaving substantial room for improvement. To address this gap, we introduce the VQA2 Instruction Dataset - the first visual question answering instruction dataset that focuses on video quality assessment. This dataset consists of 3 subsets and covers various video types, containing 157,755 instruction question-answer pairs. Then, leveraging this foundation, we present the VQA2 series models. The VQA2 series models interleave visual and motion tokens to enhance the perception of spatial-temporal quality details in videos. We conduct extensive experiments on video quality scoring and understanding tasks, and results demonstrate that the VQA2series models achieve excellent performance in both tasks. Notably, our final model, the VQA2-Assistant, exceeds the renowned GPT-4o in visual quality understanding tasks while maintaining strong competitiveness in quality scoring tasks. Our work provides a foundation and feasible approach for integrating low-level video quality assessment and understanding with LMMs. </p> </div> </dd> <dt> <a name='item603'>[603]</a> <a href ="/abs/2411.03832" title="Abstract" id="2411.03832"> arXiv:2411.03832 </a> (replaced) [<a href="/pdf/2411.03832" title="Download PDF" id="pdf-2411.03832" aria-labelledby="pdf-2411.03832">pdf</a>, <a href="https://arxiv.org/html/2411.03832v2" title="View HTML" id="html-2411.03832" aria-labelledby="html-2411.03832" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.03832" title="Other formats" id="oth-2411.03832" aria-labelledby="oth-2411.03832">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Accelerating DNA Read Mapping with Digital Processing-in-Memory </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ben-Hur,+R">Rotem Ben-Hur</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Leitersdorf,+O">Orian Leitersdorf</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ronen,+R">Ronny Ronen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Goldshmidt,+L">Lidor Goldshmidt</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Magram,+I">Idan Magram</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kaplun,+L">Lior Kaplun</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yavitz,+L">Leonid Yavitz</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kvatinsky,+S">Shahar Kvatinsky</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Hardware Architecture (cs.AR)</span>; Distributed, Parallel, and Cluster Computing (cs.DC); Quantitative Methods (q-bio.QM) </div> <p class='mathjax'> Genome analysis has revolutionized fields such as personalized medicine and forensics. Modern sequencing machines generate vast amounts of fragmented strings of genome data called reads. The alignment of these reads into a complete DNA sequence of an organism (the read mapping process) requires extensive data transfer between processing units and memory, leading to execution bottlenecks. Prior studies have primarily focused on accelerating specific stages of the read-mapping task. Conversely, this paper introduces a holistic framework called DART-PIM that accelerates the entire read-mapping process. DART-PIM facilitates digital processing-in-memory (PIM) for an end-to-end acceleration of the entire read-mapping process, from indexing using a unique data organization schema to filtering and read alignment with an optimized Wagner Fischer algorithm. A comprehensive performance evaluation with real genomic data shows that DART-PIM achieves a 5.7x and 257x improvement in throughput and a 92x and 27x energy efficiency enhancement compared to state-of-the-art GPU and PIM implementations, respectively. </p> </div> </dd> <dt> <a name='item604'>[604]</a> <a href ="/abs/2411.03962" title="Abstract" id="2411.03962"> arXiv:2411.03962 </a> (replaced) [<a href="/pdf/2411.03962" title="Download PDF" id="pdf-2411.03962" aria-labelledby="pdf-2411.03962">pdf</a>, <a href="https://arxiv.org/html/2411.03962v2" title="View HTML" id="html-2411.03962" aria-labelledby="html-2411.03962" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.03962" title="Other formats" id="oth-2411.03962" aria-labelledby="oth-2411.03962">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> How Does A Text Preprocessing Pipeline Affect Ontology Syntactic Matching? </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Qiang,+Z">Zhangcheng Qiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Taylor,+K">Kerry Taylor</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+W">Weiqing Wang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 13 pages, 26 figures, 4 tables </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span> </div> <p class='mathjax'> The generic text preprocessing pipeline, comprising Tokenisation, Normalisation, Stop Words Removal, and Stemming/Lemmatisation, has been implemented in many ontology matching (OM) systems. However, the lack of standardisation in text preprocessing creates diversity in mapping results. In this paper, we investigate the effect of the text preprocessing pipeline on OM tasks at syntactic levels. Our experiments on 8 Ontology Alignment Evaluation Initiative (OAEI) track repositories with 49 distinct alignments indicate: (1) Tokenisation and Normalisation are currently more effective than Stop Words Removal and Stemming/Lemmatisation; and (2) The selection of Lemmatisation and Stemming is task-specific. We recommend standalone Lemmatisation or Stemming with post-hoc corrections. We find that (3) Porter Stemmer and Snowball Stemmer perform better than Lancaster Stemmer; and that (4) Part-of-Speech (POS) Tagging does not help Lemmatisation. To repair less effective Stop Words Removal and Stemming/Lemmatisation used in OM tasks, we propose a novel context-based pipeline repair approach that significantly improves matching correctness and overall matching performance. We also discuss the use of text preprocessing pipeline in the new era of large language models (LLMs). </p> </div> </dd> <dt> <a name='item605'>[605]</a> <a href ="/abs/2411.04462" title="Abstract" id="2411.04462"> arXiv:2411.04462 </a> (replaced) [<a href="/pdf/2411.04462" title="Download PDF" id="pdf-2411.04462" aria-labelledby="pdf-2411.04462">pdf</a>, <a href="/format/2411.04462" title="Other formats" id="oth-2411.04462" aria-labelledby="oth-2411.04462">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Can CDT rationalise the ex ante optimal policy via modified anthropics? </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Cooper,+E">Emery Cooper</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Oesterheld,+C">Caspar Oesterheld</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Conitzer,+V">Vincent Conitzer</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Artificial Intelligence (cs.AI)</span>; Computer Science and Game Theory (cs.GT) </div> <p class='mathjax'> In Newcomb's problem, causal decision theory (CDT) recommends two-boxing and thus comes apart from evidential decision theory (EDT) and ex ante policy optimisation (which prescribe one-boxing). However, in Newcomb's problem, you should perhaps believe that with some probability you are in a simulation run by the predictor to determine whether to put a million dollars into the opaque box. If so, then causal decision theory might recommend one-boxing in order to cause the predictor to fill the opaque box. In this paper, we study generalisations of this approach. That is, we consider general Newcomblike problems and try to form reasonable self-locating beliefs under which CDT's recommendations align with an EDT-like notion of ex ante policy optimisation. We consider approaches in which we model the world as running simulations of the agent, and an approach not based on such models (which we call 'Generalised Generalised Thirding', or GGT). For each approach, we characterise the resulting CDT policies, and prove that under certain conditions, these include the ex ante optimal policies. </p> </div> </dd> <dt> <a name='item606'>[606]</a> <a href ="/abs/2411.05449" title="Abstract" id="2411.05449"> arXiv:2411.05449 </a> (replaced) [<a href="/pdf/2411.05449" title="Download PDF" id="pdf-2411.05449" aria-labelledby="pdf-2411.05449">pdf</a>, <a href="/format/2411.05449" title="Other formats" id="oth-2411.05449" aria-labelledby="oth-2411.05449">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Unmanned F/A-18 Aircraft Landing Control on Aircraft Carrier in Adverse Conditions </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Kistyarev,+M">Mikhail Kistyarev</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Wang,+X">Xinhua Wang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Systems and Control (eess.SY)</span> </div> <p class='mathjax'> Carrier landings are a difficult control task due to wind disturbances and a changing trajectory. Demand for carrier-based drones is increasing. A robust and accurate landing control system is crucial to meet this demand. Control performance can be improved by using observers to estimate unknown variables and disturbances for feedback. This study applies a nonlinear observer to estimate the combined disturbance in the pitch dynamics of an F/A-18 during carrier landing. Additionally, controllers to regulate the velocity, rate of descent and vertical position are designed. A full model, including the nonlinear flight dynamics, controller, carrier deck motion, wind and measurement noise is modelled numerically and implemented in software. Combined with proportional derivative control, the proposed pitch control method is shown to be very effective converging 85% faster than a PID controller. The simulations, verify that the pitch controller can quickly track a time-varying reference despite noise and disturbances. The positional controller used is found to be ineffective and requires improvement. </p> </div> </dd> <dt> <a name='item607'>[607]</a> <a href ="/abs/2411.05930" title="Abstract" id="2411.05930"> arXiv:2411.05930 </a> (replaced) [<a href="/pdf/2411.05930" title="Download PDF" id="pdf-2411.05930" aria-labelledby="pdf-2411.05930">pdf</a>, <a href="https://arxiv.org/html/2411.05930v2" title="View HTML" id="html-2411.05930" aria-labelledby="html-2411.05930" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.05930" title="Other formats" id="oth-2411.05930" aria-labelledby="oth-2411.05930">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> BERTrend: Neural Topic Modeling for Emerging Trends Detection </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Boutaleb,+A">Allaa Boutaleb</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Picault,+J">Jerome Picault</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Grosjean,+G">Guillaume Grosjean</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 17 pages, 12 figures, FuturED 2024: Workshop on Future of Event Detection (CoLocated with EMNLP 2024) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI); Information Retrieval (cs.IR) </div> <p class='mathjax'> Detecting and tracking emerging trends and weak signals in large, evolving text corpora is vital for applications such as monitoring scientific literature, managing brand reputation, surveilling critical infrastructure and more generally to any kind of text-based event detection. Existing solutions often fail to capture the nuanced context or dynamically track evolving patterns over time. BERTrend, a novel method, addresses these limitations using neural topic modeling in an online setting. It introduces a new metric to quantify topic popularity over time by considering both the number of documents and update frequency. This metric classifies topics as noise, weak, or strong signals, flagging emerging, rapidly growing topics for further investigation. Experimentation on two large real-world datasets demonstrates BERTrend's ability to accurately detect and track meaningful weak signals while filtering out noise, offering a comprehensive solution for monitoring emerging trends in large-scale, evolving text corpora. The method can also be used for retrospective analysis of past events. In addition, the use of Large Language Models together with BERTrend offers efficient means for the interpretability of trends of events. </p> </div> </dd> <dt> <a name='item608'>[608]</a> <a href ="/abs/2411.06735" title="Abstract" id="2411.06735"> arXiv:2411.06735 </a> (replaced) [<a href="/pdf/2411.06735" title="Download PDF" id="pdf-2411.06735" aria-labelledby="pdf-2411.06735">pdf</a>, <a href="https://arxiv.org/html/2411.06735v2" title="View HTML" id="html-2411.06735" aria-labelledby="html-2411.06735" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.06735" title="Other formats" id="oth-2411.06735" aria-labelledby="oth-2411.06735">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Multi-Modal Forecaster: Jointly Predicting Time Series and Textual Data </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+K">Kai Kim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tsai,+H">Howard Tsai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sen,+R">Rajat Sen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Das,+A">Abhimanyu Das</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+Z">Zihao Zhou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tanpure,+A">Abhishek Tanpure</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Luo,+M">Mathew Luo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+R">Rose Yu</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 21 pages, 4 tables, 2 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Artificial Intelligence (cs.AI)</span> </div> <p class='mathjax'> Current forecasting approaches are largely unimodal and ignore the rich textual data that often accompany the time series due to lack of well-curated multimodal benchmark dataset. In this work, we develop TimeText Corpus (TTC), a carefully curated, time-aligned text and time dataset for multimodal forecasting. Our dataset is composed of sequences of numbers and text aligned to timestamps, and includes data from two different domains: climate science and healthcare. Our data is a significant contribution to the rare selection of available multimodal datasets. We also propose the Hybrid Multi-Modal Forecaster (Hybrid-MMF), a multimodal LLM that jointly forecasts both text and time series data using shared embeddings. However, contrary to our expectations, our Hybrid-MMF model does not outperform existing baselines in our experiments. This negative result highlights the challenges inherent in multimodal forecasting. Our code and data are available at <a href="https://github.com/Rose-STL-Lab/Multimodal_" rel="external noopener nofollow" class="link-external link-https">this https URL</a> Forecasting. </p> </div> </dd> <dt> <a name='item609'>[609]</a> <a href ="/abs/2411.08499" title="Abstract" id="2411.08499"> arXiv:2411.08499 </a> (replaced) [<a href="/pdf/2411.08499" title="Download PDF" id="pdf-2411.08499" aria-labelledby="pdf-2411.08499">pdf</a>, <a href="https://arxiv.org/html/2411.08499v2" title="View HTML" id="html-2411.08499" aria-labelledby="html-2411.08499" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.08499" title="Other formats" id="oth-2411.08499" aria-labelledby="oth-2411.08499">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Learning Robust Grasping Strategy Through Tactile Sensing and Adaption Skill </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Hu,+Y">Yueming Hu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+M">Mengde Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+S">Songhua Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+X">Xuetao Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+S">Sheng Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+M">Miao Li</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span> </div> <p class='mathjax'> Robust grasping represents an essential task in robotics, necessitating tactile feedback and reactive grasping adjustments for robust grasping of objects. Previous research has extensively combined tactile sensing with grasping, primarily relying on rule-based approaches, frequently neglecting post-grasping difficulties such as external disruptions or inherent uncertainties of the object's physics and geometry. To address these limitations, this paper introduces an human-demonstration-based adaptive grasping policy base on tactile, which aims to achieve robust gripping while resisting disturbances to maintain grasp stability. Our trained model generalizes to daily objects with seven different sizes, shapes, and textures. Experimental results demonstrate that our method performs well in dynamic and force interaction tasks and exhibits excellent generalization ability. </p> </div> </dd> <dt> <a name='item610'>[610]</a> <a href ="/abs/2411.09263" title="Abstract" id="2411.09263"> arXiv:2411.09263 </a> (replaced) [<a href="/pdf/2411.09263" title="Download PDF" id="pdf-2411.09263" aria-labelledby="pdf-2411.09263">pdf</a>, <a href="https://arxiv.org/html/2411.09263v2" title="View HTML" id="html-2411.09263" aria-labelledby="html-2411.09263" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.09263" title="Other formats" id="oth-2411.09263" aria-labelledby="oth-2411.09263">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Rethinking Weight-Averaged Model-merging </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+H">Hu Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ma,+C">Congbo Ma</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Almakky,+I">Ibrahim Almakky</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Reid,+I">Ian Reid</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Carneiro,+G">Gustavo Carneiro</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yaqub,+M">Mohammad Yaqub</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> Weight-averaged model-merging has emerged as a powerful approach in deep learning, capable of enhancing model performance without fine-tuning or retraining. However, the underlying mechanisms that explain its effectiveness remain largely unexplored. In this paper, we investigate this technique from three novel perspectives to provide deeper insights into how and why weight-averaged model-merging works: (1) we examine the intrinsic patterns captured by the learning of the model weights, through the visualizations of their patterns on several datasets, showing that these weights often encode structured and interpretable patterns; (2) we investigate model ensemble merging strategies based on averaging on weights versus averaging on features, providing detailed analyses across diverse architectures and datasets; and (3) we explore the impact on model-merging prediction stability in terms of changing the parameter magnitude, revealing insights into the way of weight averaging works as regularization by showing the robustness across different parameter scales. Our findings shed light on the "black box" of weight-averaged model-merging, offering valuable insights and practical recommendations that advance the model-merging process. </p> </div> </dd> <dt> <a name='item611'>[611]</a> <a href ="/abs/2411.09543" title="Abstract" id="2411.09543"> arXiv:2411.09543 </a> (replaced) [<a href="/pdf/2411.09543" title="Download PDF" id="pdf-2411.09543" aria-labelledby="pdf-2411.09543">pdf</a>, <a href="https://arxiv.org/html/2411.09543v2" title="View HTML" id="html-2411.09543" aria-labelledby="html-2411.09543" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.09543" title="Other formats" id="oth-2411.09543" aria-labelledby="oth-2411.09543">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> OpenGeMM: A High-Utilization GeMM Accelerator Generator with Lightweight RISC-V Control and Tight Memory Coupling </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Yi,+X">Xiaoling Yi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Antonio,+R">Ryan Antonio</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dumoulin,+J">Joren Dumoulin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sun,+J">Jiacong Sun</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Van+Delm,+J">Josse Van Delm</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Paim,+G">Guilherme Paim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Verhelst,+M">Marian Verhelst</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Hardware Architecture (cs.AR)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Deep neural networks (DNNs) face significant challenges when deployed on resource-constrained extreme edge devices due to their computational and data-intensive nature. While standalone accelerators tailored for specific application scenarios suffer from inflexible control and limited programmability, generic hardware acceleration platforms coupled with RISC-V CPUs can enable high reusability and flexibility, yet typically at the expense of system level efficiency and low utilization. To fill this gap, we propose OpenGeMM, an open-source acceleration platform, jointly demonstrating high efficiency and utilization, as well as ease of configurability and programmability. OpenGeMM encompasses a parameterized Chisel-coded GeMM accelerator, a lightweight RISC-V processor, and a tightly coupled multi-banked scratchpad memory. The GeMM core utilization and system efficiency are boosted through three mechanisms: configuration pre-loading, input pre-fetching with output buffering, and programmable strided memory access. Experimental results show that OpenGeMM can consistently achieve hardware utilization ranging from 81.89% to 99.34% across diverse CNN and Transformer workloads. Compared to the SotA open-source Gemmini accelerator, OpenGeMM demonstrates a 3.58x to 16.40x speedup on normalized throughput across a wide variety ofGeMM workloads, while achieving 4.68 TOPS/W system efficiency. </p> </div> </dd> <dt> <a name='item612'>[612]</a> <a href ="/abs/2411.09955" title="Abstract" id="2411.09955"> arXiv:2411.09955 </a> (replaced) [<a href="/pdf/2411.09955" title="Download PDF" id="pdf-2411.09955" aria-labelledby="pdf-2411.09955">pdf</a>, <a href="https://arxiv.org/html/2411.09955v2" title="View HTML" id="html-2411.09955" aria-labelledby="html-2411.09955" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.09955" title="Other formats" id="oth-2411.09955" aria-labelledby="oth-2411.09955">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Instruction-Guided Editing Controls for Images and Multimedia: A Survey in LLM era </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Nguyen,+T+T">Thanh Tam Nguyen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ren,+Z">Zhao Ren</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pham,+T">Trinh Pham</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huynh,+T+T">Thanh Trung Huynh</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nguyen,+P+L">Phi Le Nguyen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yin,+H">Hongzhi Yin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nguyen,+Q+V+H">Quoc Viet Hung Nguyen</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Fixed a serious error in author information </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI); Human-Computer Interaction (cs.HC); Machine Learning (cs.LG); Multimedia (cs.MM) </div> <p class='mathjax'> The rapid advancement of large language models (LLMs) and multimodal learning has transformed digital content creation and manipulation. Traditional visual editing tools require significant expertise, limiting accessibility. Recent strides in instruction-based editing have enabled intuitive interaction with visual content, using natural language as a bridge between user intent and complex editing operations. This survey provides an overview of these techniques, focusing on how LLMs and multimodal models empower users to achieve precise visual modifications without deep technical knowledge. By synthesizing over 100 publications, we explore methods from generative adversarial networks to diffusion models, examining multimodal integration for fine-grained content control. We discuss practical applications across domains such as fashion, 3D scene manipulation, and video synthesis, highlighting increased accessibility and alignment with human intuition. Our survey compares existing literature, emphasizing LLM-empowered editing, and identifies key challenges to stimulate further research. We aim to democratize powerful visual editing across various industries, from entertainment to education. Interested readers are encouraged to access our repository at <a href="https://github.com/tamlhp/awesome-instruction-editing" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item613'>[613]</a> <a href ="/abs/2411.10003" title="Abstract" id="2411.10003"> arXiv:2411.10003 </a> (replaced) [<a href="/pdf/2411.10003" title="Download PDF" id="pdf-2411.10003" aria-labelledby="pdf-2411.10003">pdf</a>, <a href="https://arxiv.org/html/2411.10003v2" title="View HTML" id="html-2411.10003" aria-labelledby="html-2411.10003" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.10003" title="Other formats" id="oth-2411.10003" aria-labelledby="oth-2411.10003">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Pro-Prophet: A Systematic Load Balancing Method for Efficient Parallel Training of Large-scale MoE Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+W">Wei Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lai,+Z">Zhiquan Lai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+S">Shengwei Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+W">Weijie Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ge,+K">Keshi Ge</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shen,+A">Ao Shen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Su,+H">Huayou Su</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+D">Dongsheng Li</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Distributed, Parallel, and Cluster Computing (cs.DC)</span> </div> <p class='mathjax'> The size of deep learning models has been increasing to enhance model quality. The linear increase in training computation budget with model size means that training an extremely large-scale model is exceedingly time-consuming. Recently, the Mixture of Expert (MoE) has drawn significant attention as it can scale models to extra-large sizes with a stable computation budget. However, inefficient distributed training of large-scale MoE models hinders their broader application. Specifically, a considerable dynamic load imbalance occurs among devices during training, significantly reducing throughput. Several load-balancing works have been proposed to address the challenge. System-level solutions draw more attention for their hardware affinity and non-disruption of model convergence compared to algorithm-level ones. However, they are troubled by high communication costs and poor communication-computation overlapping. To address these challenges, we propose a systematic load-balancing method, Pro-Prophet, which consists of a planner and a scheduler for efficient parallel training of large-scale MoE models. To adapt to the dynamic load imbalance, we profile training statistics and use them to design Pro-Prophet. For lower communication volume, Pro-Prophet planner determines a series of lightweight load-balancing strategies and efficiently searches for a communication-efficient one for training based on the statistics. For sufficient overlapping of communication and computation, Pro-Prophet scheduler schedules the data-dependent operations based on the statistics and operation features, further improving the training throughput. Experimental results indicate that Pro-Prophet achieves up to 2.66x speedup compared to Deepspeed-MoE and FasterMoE. Additionally, Pro-Prophet achieves a load-balancing enhancement of up to 11.01x when compared to FasterMoE. </p> </div> </dd> <dt> <a name='item614'>[614]</a> <a href ="/abs/2411.10028" title="Abstract" id="2411.10028"> arXiv:2411.10028 </a> (replaced) [<a href="/pdf/2411.10028" title="Download PDF" id="pdf-2411.10028" aria-labelledby="pdf-2411.10028">pdf</a>, <a href="https://arxiv.org/html/2411.10028v2" title="View HTML" id="html-2411.10028" aria-labelledby="html-2411.10028" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.10028" title="Other formats" id="oth-2411.10028" aria-labelledby="oth-2411.10028">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> MOT FCG++: Enhanced Representation of Spatio-temporal Motion and Appearance Features </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Fang,+Y">Yanzhao Fang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 14 pages, 7 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> The goal of multi-object tracking (MOT) is to detect and track all objects in a scene across frames, while maintaining a unique identity for each object. Most existing methods rely on the spatial-temporal motion features and appearance embedding features of the detected objects in consecutive frames. Effectively and robustly representing the spatial and appearance features of long trajectories has become a critical factor affecting the performance of MOT. We propose a novel approach for appearance and spatial-temporal motion feature representation, improving upon the hierarchical clustering association method MOT FCG. For spatialtemporal motion features, we first propose Diagonal Modulated GIoU, which more accurately represents the relationship between the position and shape of the objects. Second, Mean Constant Velocity Modeling is proposed to reduce the effect of observation noise on target motion state estimation. For appearance features, we utilize a dynamic appearance representation that incorporates confidence information, enabling the trajectory appearance features to be more robust and global. Based on the baseline model MOT FCG, we have realized further improvements in the performance of all. we achieved 63.1 HOTA, 76.9 MOTA and 78.2 IDF1 on the MOT17 test set, and also achieved competitive performance on the MOT20 and DanceTrack sets. </p> </div> </dd> <dt> <a name='item615'>[615]</a> <a href ="/abs/2411.10346" title="Abstract" id="2411.10346"> arXiv:2411.10346 </a> (replaced) [<a href="/pdf/2411.10346" title="Download PDF" id="pdf-2411.10346" aria-labelledby="pdf-2411.10346">pdf</a>, <a href="https://arxiv.org/html/2411.10346v2" title="View HTML" id="html-2411.10346" aria-labelledby="html-2411.10346" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.10346" title="Other formats" id="oth-2411.10346" aria-labelledby="oth-2411.10346">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> BiDense: Binarization for Dense Prediction </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Yin,+R">Rui Yin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Qin,+H">Haotong Qin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Y">Yulun Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+W">Wenbo Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Guo,+Y">Yong Guo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+J">Jianjun Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+C">Cheng Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jia,+B">Biao Jia</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Dense prediction is a critical task in computer vision. However, previous methods often require extensive computational resources, which hinders their real-world application. In this paper, we propose BiDense, a generalized binary neural network (BNN) designed for efficient and accurate dense prediction tasks. BiDense incorporates two key techniques: the Distribution-adaptive Binarizer (DAB) and the Channel-adaptive Full-precision Bypass (CFB). The DAB adaptively calculates thresholds and scaling factors for binarization, effectively retaining more information within BNNs. Meanwhile, the CFB facilitates full-precision bypassing for binary convolutional layers undergoing various channel size transformations, which enhances the propagation of real-valued signals and minimizes information loss. By leveraging these techniques, BiDense preserves more real-valued information, enabling more accurate and detailed dense predictions in BNNs. Extensive experiments demonstrate that our framework achieves performance levels comparable to full-precision models while significantly reducing memory usage and computational costs. </p> </div> </dd> <dt> <a name='item616'>[616]</a> <a href ="/abs/2411.10446" title="Abstract" id="2411.10446"> arXiv:2411.10446 </a> (replaced) [<a href="/pdf/2411.10446" title="Download PDF" id="pdf-2411.10446" aria-labelledby="pdf-2411.10446">pdf</a>, <a href="https://arxiv.org/html/2411.10446v2" title="View HTML" id="html-2411.10446" aria-labelledby="html-2411.10446" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.10446" title="Other formats" id="oth-2411.10446" aria-labelledby="oth-2411.10446">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> VeriGraph: Scene Graphs for Execution Verifiable Robot Planning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ekpo,+D">Daniel Ekpo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Levy,+M">Mara Levy</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Suri,+S">Saksham Suri</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huynh,+C">Chuong Huynh</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shrivastava,+A">Abhinav Shrivastava</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Recent advancements in vision-language models (VLMs) offer potential for robot task planning, but challenges remain due to VLMs' tendency to generate incorrect action sequences. To address these limitations, we propose VeriGraph, a novel framework that integrates VLMs for robotic planning while verifying action feasibility. VeriGraph employs scene graphs as an intermediate representation, capturing key objects and spatial relationships to improve plan verification and refinement. The system generates a scene graph from input images and uses it to iteratively check and correct action sequences generated by an LLM-based task planner, ensuring constraints are respected and actions are executable. Our approach significantly enhances task completion rates across diverse manipulation scenarios, outperforming baseline methods by 58% for language-based tasks and 30% for image-based tasks. </p> </div> </dd> <dt> <a name='item617'>[617]</a> <a href ="/abs/2411.10588" title="Abstract" id="2411.10588"> arXiv:2411.10588 </a> (replaced) [<a href="/pdf/2411.10588" title="Download PDF" id="pdf-2411.10588" aria-labelledby="pdf-2411.10588">pdf</a>, <a href="/format/2411.10588" title="Other formats" id="oth-2411.10588" aria-labelledby="oth-2411.10588">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A dataset of questions on decision-theoretic reasoning in Newcomb-like problems </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Oesterheld,+C">Caspar Oesterheld</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cooper,+E">Emery Cooper</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kodama,+M">Miles Kodama</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nguyen,+L+C">Linh Chi Nguyen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Perez,+E">Ethan Perez</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 48 pages, 15 figures; code and data at <a href="https://github.com/casparoe/newcomblike_questions_dataset" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> We introduce a dataset of natural-language questions in the decision theory of so-called Newcomb-like problems. Newcomb-like problems include, for instance, decision problems in which an agent interacts with a similar other agent, and thus has to reason about the fact that the other agent will likely reason in similar ways. Evaluating LLM reasoning about Newcomb-like problems is important because interactions between foundation-model-based agents will often be Newcomb-like. Some ways of reasoning about Newcomb-like problems may allow for greater cooperation between models. <br>Our dataset contains both capabilities questions (i.e., questions with a unique, uncontroversially correct answer) and attitude questions (i.e., questions about which decision theorists would disagree). We use our dataset for an investigation of decision-theoretical capabilities and expressed attitudes and their interplay in existing models (different models by OpenAI, Anthropic, Meta, GDM, Reka, etc.), as well as models under simple prompt-based interventions. We find, among other things, that attitudes vary significantly between existing models; that high capabilities are associated with attitudes more favorable toward so-called evidential decision theory; and that attitudes are consistent across different types of questions. </p> </div> </dd> <dt> <a name='item618'>[618]</a> <a href ="/abs/2411.10718" title="Abstract" id="2411.10718"> arXiv:2411.10718 </a> (replaced) [<a href="/pdf/2411.10718" title="Download PDF" id="pdf-2411.10718" aria-labelledby="pdf-2411.10718">pdf</a>, <a href="https://arxiv.org/html/2411.10718v3" title="View HTML" id="html-2411.10718" aria-labelledby="html-2411.10718" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.10718" title="Other formats" id="oth-2411.10718" aria-labelledby="oth-2411.10718">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Transforming Teacher Education in Developing Countries: The Role of Generative AI in Bridging Theory and Practice </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Nyaaba,+M">Matthew Nyaaba</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computers and Society (cs.CY)</span> </div> <p class='mathjax'> This study examines the transformative potential of Generative AI (GenAI) in teacher education within developing countries, focusing on Ghana, where challenges such as limited pedagogical modeling, performance-based assessments, and practitioner-expertise gaps hinder progress. GenAI has the capacity to address these issues by supporting content knowledge acquisition, a role that currently dominates teacher education programs. By taking on this foundational role, GenAI allows teacher educators to redirect their focus to other critical areas, including pedagogical modeling, authentic assessments, and fostering digital literacy and critical thinking. These roles are interconnected, creating a ripple effect where pre-service teachers (PSTs) are better equipped to enhance K-12 learning outcomes and align education with workforce needs. The study emphasizes that GenAI's roles are multifaceted, directly addressing resistance to change, improving resource accessibility, and supporting teacher professional development. However, it cautions against misuse, which could undermine critical thinking and creativity, essential skills nurtured through traditional teaching methods. To ensure responsible and effective integration, the study advocates a scaffolding approach to GenAI literacy. This includes educating PSTs on its supportive role, training them in ethical use and prompt engineering, and equipping them to critically assess AI-generated content for biases and validity. The study concludes by recommending empirical research to explore these roles further and develop practical steps for integrating GenAI into teacher education systems responsibly and effectively. </p> </div> </dd> <dt> <a name='item619'>[619]</a> <a href ="/abs/2411.11053" title="Abstract" id="2411.11053"> arXiv:2411.11053 </a> (replaced) [<a href="/pdf/2411.11053" title="Download PDF" id="pdf-2411.11053" aria-labelledby="pdf-2411.11053">pdf</a>, <a href="https://arxiv.org/html/2411.11053v3" title="View HTML" id="html-2411.11053" aria-labelledby="html-2411.11053" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.11053" title="Other formats" id="oth-2411.11053" aria-labelledby="oth-2411.11053">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> SRA-MCTS: Self-driven Reasoning Augmentation with Monte Carlo Tree Search for Code Generation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+B">Bin Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lin,+Y">Yiguan Lin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+Y">Yinghao Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gao,+Y">Yang Gao</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Large language models demonstrate exceptional performance in simple code generation tasks but still face challenges in tackling complex problems. These challenges may stem from insufficient reasoning and problem decomposition capabilities. To address this issue, we propose a reasoning-augmented data generation process, SRA-MCTS, which guides the model to autonomously generate high-quality intermediate reasoning paths. This creates a positive feedback loop, enabling continuous improvement. Our method operates entirely through the model itself without requiring additional supervision. By synthesizing natural language reasoning paths and translating them into executable code, the approach ensures analytical accuracy and enhances the success rate in solving complex tasks. Experimental results show that, even without additional supervisory signals, our method achieves performance improvements across different model scales, demonstrating the significant potential of self-improvement in small models. Furthermore, the method remains robust when traditional Chain-of-Thought (CoT) approaches exhibit performance degradation, with notable improvements observed in diversity metrics such as pass@10. We encourage further exploration of reasoning processes within training data to enhance the ability of language models to address complex problems. </p> </div> </dd> <dt> <a name='item620'>[620]</a> <a href ="/abs/2411.11240" title="Abstract" id="2411.11240"> arXiv:2411.11240 </a> (replaced) [<a href="/pdf/2411.11240" title="Download PDF" id="pdf-2411.11240" aria-labelledby="pdf-2411.11240">pdf</a>, <a href="https://arxiv.org/html/2411.11240v2" title="View HTML" id="html-2411.11240" aria-labelledby="html-2411.11240" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.11240" title="Other formats" id="oth-2411.11240" aria-labelledby="oth-2411.11240">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Controlling Diversity at Inference: Guiding Diffusion Recommender Models with Targeted Category Preferences </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Han,+G">Gwangseok Han</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kweon,+W">Wonbin Kweon</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+M">Minsoo Kim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+H">Hwanjo Yu</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> KDD 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Information Retrieval (cs.IR)</span> </div> <p class='mathjax'> Diversity control is an important task to alleviate bias amplification and filter bubble problems. The desired degree of diversity may fluctuate based on users' daily moods or business strategies. However, existing methods for controlling diversity often lack flexibility, as diversity is decided during training and cannot be easily modified during inference. We propose \textbf{D3Rec} (\underline{D}isentangled \underline{D}iffusion model for \underline{D}iversified \underline{Rec}ommendation), an end-to-end method that controls the accuracy-diversity trade-off at inference. D3Rec meets our three desiderata by (1) generating recommendations based on category preferences, (2) controlling category preferences during the inference phase, and (3) adapting to arbitrary targeted category preferences. In the forward process, D3Rec removes category preferences lurking in user interactions by adding noises. Then, in the reverse process, D3Rec generates recommendations through denoising steps while reflecting desired category preferences. Extensive experiments on real-world and synthetic datasets validate the effectiveness of D3Rec in controlling diversity at inference. </p> </div> </dd> <dt> <a name='item621'>[621]</a> <a href ="/abs/2411.11581" title="Abstract" id="2411.11581"> arXiv:2411.11581 </a> (replaced) [<a href="/pdf/2411.11581" title="Download PDF" id="pdf-2411.11581" aria-labelledby="pdf-2411.11581">pdf</a>, <a href="https://arxiv.org/html/2411.11581v2" title="View HTML" id="html-2411.11581" aria-labelledby="html-2411.11581" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.11581" title="Other formats" id="oth-2411.11581" aria-labelledby="oth-2411.11581">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> OASIS: Open Agents Social Interaction Simulations on One Million Agents </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+Z">Ziyi Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Z">Zaibin Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zheng,+Z">Zirui Zheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jiang,+Y">Yuxian Jiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gan,+Z">Ziyue Gan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Z">Zhiyu Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ling,+Z">Zijian Ling</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+J">Jinsong Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ma,+M">Martz Ma</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dong,+B">Bowen Dong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gupta,+P">Prateek Gupta</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hu,+S">Shuyue Hu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yin,+Z">Zhenfei Yin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+G">Guohao Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jia,+X">Xu Jia</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+L">Lijun Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ghanem,+B">Bernard Ghanem</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lu,+H">Huchuan Lu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ouyang,+W">Wanli Ouyang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Qiao,+Y">Yu Qiao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Torr,+P">Philip Torr</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shao,+J">Jing Shao</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span> </div> <p class='mathjax'> There has been a growing interest in enhancing rule-based agent-based models (ABMs) for social media platforms (i.e., X, Reddit) with more realistic large language model (LLM) agents, thereby allowing for a more nuanced study of complex systems. As a result, several LLM-based ABMs have been proposed in the past year. While they hold promise, each simulator is specifically designed to study a particular scenario, making it time-consuming and resource-intensive to explore other phenomena using the same ABM. Additionally, these models simulate only a limited number of agents, whereas real-world social media platforms involve millions of users. To this end, we propose OASIS, a generalizable and scalable social media simulator. OASIS is designed based on real-world social media platforms, incorporating dynamically updated environments (i.e., dynamic social networks and post information), diverse action spaces (i.e., following, commenting), and recommendation systems (i.e., interest-based and hot-score-based). Additionally, OASIS supports large-scale user simulations, capable of modeling up to one million users. With these features, OASIS can be easily extended to different social media platforms to study large-scale group phenomena and behaviors. We replicate various social phenomena, including information spreading, group polarization, and herd effects across X and Reddit platforms. Moreover, we provide observations of social phenomena at different agent group scales. We observe that the larger agent group scale leads to more enhanced group dynamics and more diverse and helpful agents' opinions. These findings demonstrate OASIS's potential as a powerful tool for studying complex systems in digital environments. </p> </div> </dd> <dt> <a name='item622'>[622]</a> <a href ="/abs/2411.11853" title="Abstract" id="2411.11853"> arXiv:2411.11853 </a> (replaced) [<a href="/pdf/2411.11853" title="Download PDF" id="pdf-2411.11853" aria-labelledby="pdf-2411.11853">pdf</a>, <a href="https://arxiv.org/html/2411.11853v2" title="View HTML" id="html-2411.11853" aria-labelledby="html-2411.11853" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.11853" title="Other formats" id="oth-2411.11853" aria-labelledby="oth-2411.11853">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Chat Bankman-Fried: an Exploration of LLM Alignment in Finance </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Biancotti,+C">Claudia Biancotti</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Camassa,+C">Carolina Camassa</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Coletta,+A">Andrea Coletta</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Giudice,+O">Oliver Giudice</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Glielmo,+A">Aldo Glielmo</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computers and Society (cs.CY)</span>; Artificial Intelligence (cs.AI); Computation and Language (cs.CL); General Finance (q-fin.GN) </div> <p class='mathjax'> Advancements in large language models (LLMs) have renewed concerns about AI alignment - the consistency between human and AI goals and values. As various jurisdictions enact legislation on AI safety, the concept of alignment must be defined and measured across different domains. This paper proposes an experimental framework to assess whether LLMs adhere to ethical and legal standards in the relatively unexplored context of finance. We prompt nine LLMs to impersonate the CEO of a financial institution and test their willingness to misuse customer assets to repay outstanding corporate debt. Beginning with a baseline configuration, we adjust preferences, incentives and constraints, analyzing the impact of each adjustment with logistic regression. Our findings reveal significant heterogeneity in the baseline propensity for unethical behavior of LLMs. Factors such as risk aversion, profit expectations, and regulatory environment consistently influence misalignment in ways predicted by economic theory, although the magnitude of these effects varies across LLMs. This paper highlights both the benefits and limitations of simulation-based, ex post safety testing. While it can inform financial authorities and institutions aiming to ensure LLM safety, there is a clear trade-off between generality and cost. </p> </div> </dd> <dt> <a name='item623'>[623]</a> <a href ="/abs/2411.12089" title="Abstract" id="2411.12089"> arXiv:2411.12089 </a> (replaced) [<a href="/pdf/2411.12089" title="Download PDF" id="pdf-2411.12089" aria-labelledby="pdf-2411.12089">pdf</a>, <a href="https://arxiv.org/html/2411.12089v2" title="View HTML" id="html-2411.12089" aria-labelledby="html-2411.12089" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.12089" title="Other formats" id="oth-2411.12089" aria-labelledby="oth-2411.12089">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> FruitNinja: 3D Object Interior Texture Generation with Gaussian Splatting </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+F">Fangyu Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+Y">Yuhao Chen</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Graphics (cs.GR); Human-Computer Interaction (cs.HC) </div> <p class='mathjax'> In the real world, objects reveal internal textures when sliced or cut, yet this behavior is not well-studied in 3D generation tasks today. For example, slicing a virtual 3D watermelon should reveal flesh and seeds. Given that no available dataset captures an object's full internal structure and collecting data from all slices is impractical, generative methods become the obvious approach. However, current 3D generation and inpainting methods often focus on visible appearance and overlook internal textures. To bridge this gap, we introduce FruitNinja, the first method to generate internal textures for 3D objects undergoing geometric and topological changes. Our approach produces objects via 3D Gaussian Splatting (3DGS) with both surface and interior textures synthesized, enabling real-time slicing and rendering without additional optimization. FruitNinja leverages a pre-trained diffusion model to progressively inpaint cross-sectional views and applies voxel-grid-based smoothing to achieve cohesive textures throughout the object. Our OpaqueAtom GS strategy overcomes 3DGS limitations by employing densely distributed opaque Gaussians, avoiding biases toward larger particles that destabilize training and sharp color transitions for fine-grained textures. Experimental results show that FruitNinja substantially outperforms existing approaches, showcasing unmatched visual quality in real-time rendered internal views across arbitrary geometry manipulations. </p> </div> </dd> <dt> <a name='item624'>[624]</a> <a href ="/abs/2411.12248" title="Abstract" id="2411.12248"> arXiv:2411.12248 </a> (replaced) [<a href="/pdf/2411.12248" title="Download PDF" id="pdf-2411.12248" aria-labelledby="pdf-2411.12248">pdf</a>, <a href="https://arxiv.org/html/2411.12248v2" title="View HTML" id="html-2411.12248" aria-labelledby="html-2411.12248" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.12248" title="Other formats" id="oth-2411.12248" aria-labelledby="oth-2411.12248">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Neuro-3D: Towards 3D Visual Decoding from EEG Signals </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Guo,+Z">Zhanqiang Guo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+J">Jiamin Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Song,+Y">Yonghao Song</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bu,+J">Jiahui Bu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mai,+W">Weijian Mai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zheng,+Q">Qihao Zheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ouyang,+W">Wanli Ouyang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Song,+C">Chunfeng Song</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Human's perception of the visual world is shaped by the stereo processing of 3D information. Understanding how the brain perceives and processes 3D visual stimuli in the real world has been a longstanding endeavor in neuroscience. Towards this goal, we introduce a new neuroscience task: decoding 3D visual perception from EEG signals, a neuroimaging technique that enables real-time monitoring of neural dynamics enriched with complex visual cues. To provide the essential benchmark, we first present EEG-3D, a pioneering dataset featuring multimodal analysis data and extensive EEG recordings from 12 subjects viewing 72 categories of 3D objects rendered in both videos and images. Furthermore, we propose Neuro-3D, a 3D visual decoding framework based on EEG signals. This framework adaptively integrates EEG features derived from static and dynamic stimuli to learn complementary and robust neural representations, which are subsequently utilized to recover both the shape and color of 3D objects through the proposed diffusion-based colored point cloud decoder. To the best of our knowledge, we are the first to explore EEG-based 3D visual decoding. Experiments indicate that Neuro-3D not only reconstructs colored 3D objects with high fidelity, but also learns effective neural representations that enable insightful brain region analysis. The dataset and associated code will be made publicly available. </p> </div> </dd> <dt> <a name='item625'>[625]</a> <a href ="/abs/2411.12641" title="Abstract" id="2411.12641"> arXiv:2411.12641 </a> (replaced) [<a href="/pdf/2411.12641" title="Download PDF" id="pdf-2411.12641" aria-labelledby="pdf-2411.12641">pdf</a>, <a href="/format/2411.12641" title="Other formats" id="oth-2411.12641" aria-labelledby="oth-2411.12641">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Improving Controllability and Editability for Pretrained Text-to-Music Generation Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Y">Yixiao Zhang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> PhD Thesis </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Audio and Speech Processing (eess.AS) </div> <p class='mathjax'> The field of AI-assisted music creation has made significant strides, yet existing systems often struggle to meet the demands of iterative and nuanced music production. These challenges include providing sufficient control over the generated content and allowing for flexible, precise edits. This thesis tackles these issues by introducing a series of advancements that progressively build upon each other, enhancing the controllability and editability of text-to-music generation models. <br>First, we introduce Loop Copilot, a system that tries to address the need for iterative refinement in music creation. Loop Copilot leverages a large language model (LLM) to coordinate multiple specialised AI models, enabling users to generate and refine music interactively through a conversational interface. Central to this system is the Global Attribute Table, which records and maintains key musical attributes throughout the iterative process, ensuring that modifications at any stage preserve the overall coherence of the music. While Loop Copilot excels in orchestrating the music creation process, it does not directly address the need for detailed edits to the generated content. <br>To overcome this limitation, MusicMagus is presented as a further solution for editing AI-generated music. MusicMagus introduces a zero-shot text-to-music editing approach that allows for the modification of specific musical attributes, such as genre, mood, and instrumentation, without the need for retraining. By manipulating the latent space within pre-trained diffusion models, MusicMagus ensures that these edits are stylistically coherent and that non-targeted attributes remain unchanged. This system is particularly effective in maintaining the structural integrity of the music during edits, but it encounters challenges with more complex and real-world audio scenarios. <br>... </p> </div> </dd> <dt> <a name='item626'>[626]</a> <a href ="/abs/2411.12700" title="Abstract" id="2411.12700"> arXiv:2411.12700 </a> (replaced) [<a href="/pdf/2411.12700" title="Download PDF" id="pdf-2411.12700" aria-labelledby="pdf-2411.12700">pdf</a>, <a href="/format/2411.12700" title="Other formats" id="oth-2411.12700" aria-labelledby="oth-2411.12700">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Learning multivariate Gaussians with imperfect advice </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Bhattacharyya,+A">Arnab Bhattacharyya</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Choo,+D">Davin Choo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=John,+P+G">Philips George John</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gouleakis,+T">Themis Gouleakis</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Data Structures and Algorithms (cs.DS); Information Theory (cs.IT); Machine Learning (stat.ML) </div> <p class='mathjax'> We revisit the problem of distribution learning within the framework of learning-augmented algorithms. In this setting, we explore the scenario where a probability distribution is provided as potentially inaccurate advice on the true, unknown distribution. Our objective is to develop learning algorithms whose sample complexity decreases as the quality of the advice improves, thereby surpassing standard learning lower bounds when the advice is sufficiently accurate. <br>Specifically, we demonstrate that this outcome is achievable for the problem of learning a multivariate Gaussian distribution $N(\boldsymbol{\mu}, \boldsymbol{\Sigma})$ in the PAC learning setting. Classically, in the advice-free setting, $\tilde{\Theta}(d^2/\varepsilon^2)$ samples are sufficient and worst case necessary to learn $d$-dimensional Gaussians up to TV distance $\varepsilon$ with constant probability. When we are additionally given a parameter $\tilde{\boldsymbol{\Sigma}}$ as advice, we show that $\tilde{O}(d^{2-\beta}/\varepsilon^2)$ samples suffices whenever $\| \tilde{\boldsymbol{\Sigma}}^{-1/2} \boldsymbol{\Sigma} \tilde{\boldsymbol{\Sigma}}^{-1/2} - \boldsymbol{I_d} \|_1 \leq \varepsilon d^{1-\beta}$ (where $\|\cdot\|_1$ denotes the entrywise $\ell_1$ norm) for any $\beta > 0$, yielding a polynomial improvement over the advice-free setting. </p> </div> </dd> <dt> <a name='item627'>[627]</a> <a href ="/abs/2411.13009" title="Abstract" id="2411.13009"> arXiv:2411.13009 </a> (replaced) [<a href="/pdf/2411.13009" title="Download PDF" id="pdf-2411.13009" aria-labelledby="pdf-2411.13009">pdf</a>, <a href="https://arxiv.org/html/2411.13009v2" title="View HTML" id="html-2411.13009" aria-labelledby="html-2411.13009" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13009" title="Other formats" id="oth-2411.13009" aria-labelledby="oth-2411.13009">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> LLMSteer: Improving Long-Context LLM Inference by Steering Attention on Reused Contexts </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Gu,+Z">Zhuohan Gu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yao,+J">Jiayi Yao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Du,+K">Kuntai Du</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jiang,+J">Junchen Jiang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Computation and Language (cs.CL) </div> <p class='mathjax'> As large language models (LLMs) show impressive performance on complex tasks, they still struggle with longer contextual understanding and high computational costs. To balance efficiency and quality, we introduce LLMSteer, a fine-tuning-free framework that enhances LLMs through query-independent attention steering. Tested on popular LLMs and datasets, LLMSteer narrows the performance gap with baselines by 65.9% and reduces the runtime delay by up to 4.8x compared to recent attention steering methods. </p> </div> </dd> <dt> <a name='item628'>[628]</a> <a href ="/abs/2411.13187" title="Abstract" id="2411.13187"> arXiv:2411.13187 </a> (replaced) [<a href="/pdf/2411.13187" title="Download PDF" id="pdf-2411.13187" aria-labelledby="pdf-2411.13187">pdf</a>, <a href="https://arxiv.org/html/2411.13187v2" title="View HTML" id="html-2411.13187" aria-labelledby="html-2411.13187" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13187" title="Other formats" id="oth-2411.13187" aria-labelledby="oth-2411.13187">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Engagement-Driven Content Generation with Large Language Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Coppolillo,+E">Erica Coppolillo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cinus,+F">Federico Cinus</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Minici,+M">Marco Minici</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bonchi,+F">Francesco Bonchi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Manco,+G">Giuseppe Manco</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Large Language Models (LLMs) exhibit significant persuasion capabilities in one-on-one interactions, but their influence within social networks remains underexplored. This study investigates the potential social impact of LLMs in these environments, where interconnected users and complex opinion dynamics pose unique challenges. In particular, we address the following research question: can LLMs learn to generate meaningful content that maximizes user engagement on social networks? <br>To answer this question, we define a pipeline to guide the LLM-based content generation which employs reinforcement learning with simulated feedback. In our framework, the reward is based on an engagement model borrowed from the literature on opinion dynamics and information propagation. Moreover, we force the text generated by the LLM to be aligned with a given topic and to satisfy a minimum fluency requirement. <br>Using our framework, we analyze the capabilities and limitations of LLMs in tackling the given task, specifically considering the relative positions of the LLM as an agent within the social network and the distribution of opinions in the network on the given topic. Our findings show the full potential of LLMs in creating social engagement. Notable properties of our approach are that the learning procedure is adaptive to the opinion distribution of the underlying network and agnostic to the specifics of the engagement model, which is embedded as a plug-and-play component. In this regard, our approach can be easily refined for more complex engagement tasks and interventions in computational social science. <br>The code used for the experiments is publicly available at <a href="https://anonymous.4open.science/r/EDCG/" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item629'>[629]</a> <a href ="/abs/2411.13211" title="Abstract" id="2411.13211"> arXiv:2411.13211 </a> (replaced) [<a href="/pdf/2411.13211" title="Download PDF" id="pdf-2411.13211" aria-labelledby="pdf-2411.13211">pdf</a>, <a href="https://arxiv.org/html/2411.13211v2" title="View HTML" id="html-2411.13211" aria-labelledby="html-2411.13211" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13211" title="Other formats" id="oth-2411.13211" aria-labelledby="oth-2411.13211">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> ViSTa Dataset: Do vision-language models understand sequential tasks? </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wybitul,+E">Ev啪en Wybitul</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gunter,+E+R">Evan Ryan Gunter</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Seleznyov,+M">Mikhail Seleznyov</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lindner,+D">David Lindner</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Using vision-language models (VLMs) as reward models in reinforcement learning holds promise for reducing costs and improving safety. So far, VLM reward models have only been used for goal-oriented tasks, where the agent must reach a particular final outcome. We explore VLMs' potential to supervise tasks that cannot be scored by the final state alone. To this end, we introduce ViSTa, a dataset for evaluating Vision-based understanding of Sequential Tasks. ViSTa comprises over 4,000 videos with step-by-step descriptions in virtual home, Minecraft, and real-world environments. Its novel hierarchical structure -- basic single-step tasks composed into more and more complex sequential tasks -- allows a fine-grained understanding of how well VLMs can judge tasks with varying complexity. To illustrate this, we use ViSTa to evaluate state-of-the-art VLMs, including CLIP, ViCLIP, and GPT-4o. We find that, while they are all good at object recognition, they fail to understand sequential tasks, with only GPT-4o achieving non-trivial performance. </p> </div> </dd> <dt> <a name='item630'>[630]</a> <a href ="/abs/2411.13366" title="Abstract" id="2411.13366"> arXiv:2411.13366 </a> (replaced) [<a href="/pdf/2411.13366" title="Download PDF" id="pdf-2411.13366" aria-labelledby="pdf-2411.13366">pdf</a>, <a href="https://arxiv.org/html/2411.13366v2" title="View HTML" id="html-2411.13366" aria-labelledby="html-2411.13366" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13366" title="Other formats" id="oth-2411.13366" aria-labelledby="oth-2411.13366">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Predicting Wall Thickness Changes in Cold Forging Processes: An Integrated FEM and Neural Network approach </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ilic,+S">Sasa Ilic</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Karaman,+A">Abdulkerim Karaman</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=P%C3%B6ppelbaum,+J">Johannes P枚ppelbaum</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Reimann,+J+N">Jan Niclas Reimann</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Marr%C3%A9,+M">Michael Marr茅</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Schwung,+A">Andreas Schwung</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> This study presents a novel approach for predicting wall thickness changes in tubes during the nosing process. Specifically, we first provide a thorough analysis of nosing processes and the influencing parameters. We further set-up a Finite Element Method (FEM) simulation to better analyse the effects of varying process parameters. As however traditional FEM simulations, while accurate, are time-consuming and computationally intensive, which renders them inapplicable for real-time application, we present a novel modeling framework based on specifically designed graph neural networks as surrogate models. To this end, we extend the neural network architecture by directly incorporating information about the nosing process by adding different types of edges and their corresponding encoders to model object interactions. This augmentation enhances model accuracy and opens the possibility for employing precise surrogate models within closed-loop production processes. The proposed approach is evaluated using a new evaluation metric termed area between thickness curves (ABTC). The results demonstrate promising performance and highlight the potential of neural networks as surrogate models in predicting wall thickness changes during nosing forging processes. </p> </div> </dd> <dt> <a name='item631'>[631]</a> <a href ="/abs/2411.13407" title="Abstract" id="2411.13407"> arXiv:2411.13407 </a> (replaced) [<a href="/pdf/2411.13407" title="Download PDF" id="pdf-2411.13407" aria-labelledby="pdf-2411.13407">pdf</a>, <a href="https://arxiv.org/html/2411.13407v2" title="View HTML" id="html-2411.13407" aria-labelledby="html-2411.13407" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13407" title="Other formats" id="oth-2411.13407" aria-labelledby="oth-2411.13407">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Transformer-Based Contextualized Language Models Joint with Neural Networks for Natural Language Inference in Vietnamese </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Nguyen,+D+V">Dat Van-Thanh Nguyen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Van+Huynh,+T">Tin Van Huynh</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Van+Nguyen,+K">Kiet Van Nguyen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nguyen,+N+L">Ngan Luu-Thuy Nguyen</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span> </div> <p class='mathjax'> Natural Language Inference (NLI) is a task within Natural Language Processing (NLP) that holds value for various AI applications. However, there have been limited studies on Natural Language Inference in Vietnamese that explore the concept of joint models. Therefore, we conducted experiments using various combinations of contextualized language models (CLM) and neural networks. We use CLM to create contextualized work presentations and use Neural Networks for classification. Furthermore, we have evaluated the strengths and weaknesses of each joint model and identified the model failure points in the Vietnamese context. The highest F1 score in this experiment, up to 82.78% in the benchmark dataset (ViNLI). By conducting experiments with various models, the most considerable size of the CLM is XLM-R (355M). That combination has consistently demonstrated superior performance compared to fine-tuning strong pre-trained language models like PhoBERT (+6.58%), mBERT (+19.08%), and XLM-R (+0.94%) in terms of F1-score. This article aims to introduce a novel approach or model that attains improved performance for Vietnamese NLI. Overall, we find that the joint approach of CLM and neural networks is simple yet capable of achieving high-quality performance, which makes it suitable for applications that require efficient resource utilization. </p> </div> </dd> <dt> <a name='item632'>[632]</a> <a href ="/abs/2411.13504" title="Abstract" id="2411.13504"> arXiv:2411.13504 </a> (replaced) [<a href="/pdf/2411.13504" title="Download PDF" id="pdf-2411.13504" aria-labelledby="pdf-2411.13504">pdf</a>, <a href="https://arxiv.org/html/2411.13504v2" title="View HTML" id="html-2411.13504" aria-labelledby="html-2411.13504" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13504" title="Other formats" id="oth-2411.13504" aria-labelledby="oth-2411.13504">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Disentangling Memory and Reasoning Ability in Large Language Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Jin,+M">Mingyu Jin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Luo,+W">Weidi Luo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cheng,+S">Sitao Cheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+X">Xinyi Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hua,+W">Wenyue Hua</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tang,+R">Ruixiang Tang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+W+Y">William Yang Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Y">Yongfeng Zhang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span> </div> <p class='mathjax'> Large Language Models (LLMs) have demonstrated strong performance in handling complex tasks requiring both extensive knowledge and reasoning abilities. However, the existing LLM inference pipeline operates as an opaque process without explicit separation between knowledge retrieval and reasoning steps, making the model's decision-making process unclear and disorganized. This ambiguity can lead to issues such as hallucinations and knowledge forgetting, which significantly impact the reliability of LLMs in high-stakes domains. In this paper, we propose a new inference paradigm that decomposes the complex inference process into two distinct and clear actions: (1) memory recall: which retrieves relevant knowledge, and (2) reasoning: which performs logical steps based on the recalled knowledge. To facilitate this decomposition, we introduce two special tokens memory and reason, guiding the model to distinguish between steps that require knowledge retrieval and those that involve reasoning. Our experiment results show that this decomposition not only improves model performance but also enhances the interpretability of the inference process, enabling users to identify sources of error and refine model responses effectively. The code is available at <a href="https://github.com/MingyuJ666/Disentangling-Memory-and-Reasoning" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item633'>[633]</a> <a href ="/abs/2411.13525" title="Abstract" id="2411.13525"> arXiv:2411.13525 </a> (replaced) [<a href="/pdf/2411.13525" title="Download PDF" id="pdf-2411.13525" aria-labelledby="pdf-2411.13525">pdf</a>, <a href="https://arxiv.org/html/2411.13525v2" title="View HTML" id="html-2411.13525" aria-labelledby="html-2411.13525" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13525" title="Other formats" id="oth-2411.13525" aria-labelledby="oth-2411.13525">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Geometric Algebra Planes: Convex Implicit Neural Volumes </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Sivgin,+I">Irmak Sivgin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fridovich-Keil,+S">Sara Fridovich-Keil</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wetzstein,+G">Gordon Wetzstein</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pilanci,+M">Mert Pilanci</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Code is available at <a href="https://github.com/sivginirmak/Geometric-Algebra-Planes" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Volume parameterizations abound in recent literature, from the classic voxel grid to the implicit neural representation and everything in between. While implicit representations have shown impressive capacity and better memory efficiency compared to voxel grids, to date they require training via nonconvex optimization. This nonconvex training process can be slow to converge and sensitive to initialization and hyperparameter choices that affect the final converged result. We introduce a family of models, GA-Planes, that is the first class of implicit neural volume representations that can be trained by convex optimization. GA-Planes models include any combination of features stored in tensor basis elements, followed by a neural feature decoder. They generalize many existing representations and can be adapted for convex, semiconvex, or nonconvex training as needed for different inverse problems. In the 2D setting, we prove that GA-Planes is equivalent to a low-rank plus low-resolution matrix factorization; we show that this approximation outperforms the classic low-rank plus sparse decomposition for fitting a natural image. In 3D, we demonstrate GA-Planes' competitive performance in terms of expressiveness, model size, and optimizability across three volume fitting tasks: radiance field reconstruction, 3D segmentation, and video segmentation. </p> </div> </dd> <dt> <a name='item634'>[634]</a> <a href ="/abs/2411.13545" title="Abstract" id="2411.13545"> arXiv:2411.13545 </a> (replaced) [<a href="/pdf/2411.13545" title="Download PDF" id="pdf-2411.13545" aria-labelledby="pdf-2411.13545">pdf</a>, <a href="https://arxiv.org/html/2411.13545v2" title="View HTML" id="html-2411.13545" aria-labelledby="html-2411.13545" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13545" title="Other formats" id="oth-2411.13545" aria-labelledby="oth-2411.13545">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Pushing the Limits of Sparsity: A Bag of Tricks for Extreme Pruning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+A">Andy Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Durrant,+A">Aiden Durrant</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Markovic,+M">Milan Markovic</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yin,+L">Lu Yin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Leontidis,+G">Georgios Leontidis</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> V2: same as V1 but with appendix/preliminaries; 12 pages, 5 figures, 4 tables </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span> </div> <p class='mathjax'> Pruning of deep neural networks has been an effective technique for reducing model size while preserving most of the performance of dense networks, crucial for deploying models on memory and power-constrained devices. While recent sparse learning methods have shown promising performance up to moderate sparsity levels such as 95% and 98%, accuracy quickly deteriorates when pushing sparsities to extreme levels. Obtaining sparse networks at such extreme sparsity levels presents unique challenges, such as fragile gradient flow and heightened risk of layer collapse. In this work, we explore network performance beyond the commonly studied sparsities, and propose a collection of techniques that enable the continuous learning of networks without accuracy collapse even at extreme sparsities, including 99.90%, 99.95% and 99.99% on ResNet architectures. Our approach combines 1) Dynamic ReLU phasing, where DyReLU initially allows for richer parameter exploration before being gradually replaced by standard ReLU, 2) weight sharing which reuses parameters within a residual layer while maintaining the same number of learnable parameters, and 3) cyclic sparsity, where both sparsity levels and sparsity patterns evolve dynamically throughout training to better encourage parameter exploration. We evaluate our method, which we term Extreme Adaptive Sparse Training (EAST) at extreme sparsities using ResNet-34 and ResNet-50 on CIFAR-10, CIFAR-100, and ImageNet, achieving significant performance improvements over state-of-the-art methods we compared with. </p> </div> </dd> <dt> <a name='item635'>[635]</a> <a href ="/abs/2207.13021" title="Abstract" id="2207.13021"> arXiv:2207.13021 </a> (replaced) [<a href="/pdf/2207.13021" title="Download PDF" id="pdf-2207.13021" aria-labelledby="pdf-2207.13021">pdf</a>, <a href="/format/2207.13021" title="Other formats" id="oth-2207.13021" aria-labelledby="oth-2207.13021">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> CTVR-EHO TDA-IPH Topological Optimized Convolutional Visual Recurrent Network for Brain Tumor Segmentation and Classification </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Joshi,+D">Dhananjay Joshi</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Singh,+B+K">Bhupesh Kumar Singh</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Nagwanshi,+K+K">Kapil Kumar Nagwanshi</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Choubey,+N+S">Nitin S. Choubey</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> In today's world of health care, brain tumor detection has become common. However, the manual brain tumor classification approach is time-consuming. So Deep Convolutional Neural Network (DCNN) is used by many researchers in the medical field for making accurate diagnoses and aiding in the patient's treatment. The traditional techniques have problems such as overfitting and the inability to extract necessary features. To overcome these problems, we developed the Topological Data Analysis based Improved Persistent Homology (TDA-IPH) and Convolutional Transfer learning and Visual Recurrent learning with Elephant Herding Optimization hyper-parameter tuning (CTVR-EHO) models for brain tumor segmentation and classification. Initially, the Topological Data Analysis based Improved Persistent Homology is designed to segment the brain tumor image. Then, from the segmented image, features are extracted using TL via the AlexNet model and Bidirectional Visual Long Short-Term Memory (Bi-VLSTM). Next, elephant Herding Optimization (EHO) is used to tune the hyperparameters of both networks to get an optimal result. Finally, extracted features are concatenated and classified using the softmax activation layer. The simulation result of this proposed CTVR-EHO and TDA-IPH method is analyzed based on precision, accuracy, recall, loss, and F score metrics. When compared to other existing brain tumor segmentation and classification models, the proposed CTVR-EHO and TDA-IPH approaches show high accuracy (99.8%), high recall (99.23%), high precision (99.67%), and high F score (99.59%). </p> </div> </dd> <dt> <a name='item636'>[636]</a> <a href ="/abs/2210.08624" title="Abstract" id="2210.08624"> arXiv:2210.08624 </a> (replaced) [<a href="/pdf/2210.08624" title="Download PDF" id="pdf-2210.08624" aria-labelledby="pdf-2210.08624">pdf</a>, <a href="https://arxiv.org/html/2210.08624v2" title="View HTML" id="html-2210.08624" aria-labelledby="html-2210.08624" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2210.08624" title="Other formats" id="oth-2210.08624" aria-labelledby="oth-2210.08624">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Attention-Based Audio Embeddings for Query-by-Example </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Singh,+A">Anup Singh</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Demuynck,+K">Kris Demuynck</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Arora,+V">Vipul Arora</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted in ISMIR 2022 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Audio and Speech Processing (eess.AS)</span>; Sound (cs.SD) </div> <p class='mathjax'> An ideal audio retrieval system efficiently and robustly recognizes a short query snippet from an extensive database. However, the performance of well-known audio fingerprinting systems falls short at high signal distortion levels. This paper presents an audio retrieval system that generates noise and reverberation robust audio fingerprints using the contrastive learning framework. Using these fingerprints, the method performs a comprehensive search to identify the query audio and precisely estimate its timestamp in the reference audio. Our framework involves training a CNN to maximize the similarity between pairs of embeddings extracted from clean audio and its corresponding distorted and time-shifted version. We employ a channel-wise spectral-temporal attention mechanism to better discriminate the audio by giving more weight to the salient spectral-temporal patches in the signal. Experimental results indicate that our system is efficient in computation and memory usage while being more accurate, particularly at higher distortion levels, than competing state-of-the-art systems and scalable to a larger database. </p> </div> </dd> <dt> <a name='item637'>[637]</a> <a href ="/abs/2212.02444" title="Abstract" id="2212.02444"> arXiv:2212.02444 </a> (replaced) [<a href="/pdf/2212.02444" title="Download PDF" id="pdf-2212.02444" aria-labelledby="pdf-2212.02444">pdf</a>, <a href="https://arxiv.org/html/2212.02444v3" title="View HTML" id="html-2212.02444" aria-labelledby="html-2212.02444" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2212.02444" title="Other formats" id="oth-2212.02444" aria-labelledby="oth-2212.02444">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Homotopy type theory as a language for diagrams of $\infty$-logoses </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Uemura,+T">Taichi Uemura</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Category Theory (math.CT)</span>; Logic in Computer Science (cs.LO); Logic (math.LO) </div> <p class='mathjax'> We show that certain diagrams of $\infty$-logoses are reconstructed in homotopy type theory extended with some lex, accessible modalities, which enables us to use plain homotopy type theory to reason about not only a single $\infty$-logos but also a diagram of $\infty$-logoses. This also provides a higher dimensional version of Sterling's synthetic Tait computability -- a type theory for higher dimensional logical relations. </p> </div> </dd> <dt> <a name='item638'>[638]</a> <a href ="/abs/2302.09682" title="Abstract" id="2302.09682"> arXiv:2302.09682 </a> (replaced) [<a href="/pdf/2302.09682" title="Download PDF" id="pdf-2302.09682" aria-labelledby="pdf-2302.09682">pdf</a>, <a href="https://arxiv.org/html/2302.09682v2" title="View HTML" id="html-2302.09682" aria-labelledby="html-2302.09682" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2302.09682" title="Other formats" id="oth-2302.09682" aria-labelledby="oth-2302.09682">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Dual Attention Model with Reinforcement Learning for Classification of Histology Whole-Slide Images </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Raza,+M">Manahil Raza</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Awan,+R">Ruqayya Awan</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Bashir,+R+M+S">Raja Muhammad Saad Bashir</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Qaiser,+T">Talha Qaiser</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Rajpoot,+N+M">Nasir M. Rajpoot</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> Digital whole slide images (WSIs) are generally captured at microscopic resolution and encompass extensive spatial data. Directly feeding these images to deep learning models is computationally intractable due to memory constraints, while downsampling the WSIs risks incurring information loss. Alternatively, splitting the WSIs into smaller patches may result in a loss of important contextual information. In this paper, we propose a novel dual attention approach, consisting of two main components, both inspired by the visual examination process of a pathologist: The first soft attention model processes a low magnification view of the WSI to identify relevant regions of interest, followed by a custom sampling method to extract diverse and spatially distinct image tiles from the selected ROIs. The second component, the hard attention classification model further extracts a sequence of multi-resolution glimpses from each tile for classification. Since hard attention is non-differentiable, we train this component using reinforcement learning to predict the location of the glimpses. This approach allows the model to focus on essential regions instead of processing the entire tile, thereby aligning with a pathologist's way of diagnosis. The two components are trained in an end-to-end fashion using a joint loss function to demonstrate the efficacy of the model. The proposed model was evaluated on two WSI-level classification problems: Human epidermal growth factor receptor 2 scoring on breast cancer histology images and prediction of Intact/Loss status of two Mismatch Repair biomarkers from colorectal cancer histology images. We show that the proposed model achieves performance better than or comparable to the state-of-the-art methods while processing less than 10% of the WSI at the highest magnification and reducing the time required to infer the WSI-level label by more than 75%. </p> </div> </dd> <dt> <a name='item639'>[639]</a> <a href ="/abs/2312.10495" title="Abstract" id="2312.10495"> arXiv:2312.10495 </a> (replaced) [<a href="/pdf/2312.10495" title="Download PDF" id="pdf-2312.10495" aria-labelledby="pdf-2312.10495">pdf</a>, <a href="https://arxiv.org/html/2312.10495v2" title="View HTML" id="html-2312.10495" aria-labelledby="html-2312.10495" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2312.10495" title="Other formats" id="oth-2312.10495" aria-labelledby="oth-2312.10495">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Computing Optimal Joint Chance Constrained Control Policies </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Schmid,+N">Niklas Schmid</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Fochesato,+M">Marta Fochesato</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Li,+S+H">Sarah H.Q. Li</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Sutter,+T">Tobias Sutter</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Lygeros,+J">John Lygeros</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Optimization and Control (math.OC)</span>; Systems and Control (eess.SY) </div> <p class='mathjax'> We consider the problem of optimally controlling stochastic, Markovian systems subject to joint chance constraints over a finite-time horizon. For such problems, standard Dynamic Programming is inapplicable due to the time correlation of the joint chance constraints, which calls for non-Markovian, and possibly stochastic, policies. Hence, despite the popularity of this problem, solution approaches capable of providing provably-optimal and easy-to-compute policies are still missing. We fill this gap by augmenting the dynamics via a binary state, allowing us to characterize the optimal policies and develop a Dynamic Programming based solution method. </p> </div> </dd> <dt> <a name='item640'>[640]</a> <a href ="/abs/2401.08758" title="Abstract" id="2401.08758"> arXiv:2401.08758 </a> (replaced) [<a href="/pdf/2401.08758" title="Download PDF" id="pdf-2401.08758" aria-labelledby="pdf-2401.08758">pdf</a>, <a href="/format/2401.08758" title="Other formats" id="oth-2401.08758" aria-labelledby="oth-2401.08758">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> DiscoTEX 1.0: Discontinuous collocation and implicit-turned-explicit (IMTEX) integration symplectic, symmetric numerical algorithms with higher order jumps for differential equations I: numerical black hole perturbation theory applications </div> <div class='list-authors'><a href="https://arxiv.org/search/gr-qc?searchtype=author&query=Da+Silva,+L+J+G">Lidia J. Gomes Da Silva</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 50 pages, 19 figures, 9 tables. Several typos corrected, higher-order results for the computation of energy and angular moment fluxes added. Includes overview of previous numerical methods implemented in the time-domain for the modelling of asymmetric mass ratio inspirals with suitability checks on Table 9. Now first of a series of papers. Comments are welcome </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">General Relativity and Quantum Cosmology (gr-qc)</span>; High Energy Astrophysical Phenomena (astro-ph.HE); Numerical Analysis (math.NA); Computational Physics (physics.comp-ph) </div> <p class='mathjax'> Dirac $\delta-$ distributionally sourced differential equations emerge in many dynamical physical systems from machine learning, finance, neuroscience, and seismology to black hole perturbation theory. These systems lack exact analytical solutions and are thus best tackled numerically. We describe a generic numerical algorithm which constructs discontinuous spatial and temporal discretisations by operating on discontinuous Lagrange and Hermite interpolation formulae, respectively. By solving the distributionally sourced wave equation, possessing analytical solutions, we demonstrate that numerical weak-form solutions can be recovered to high-order accuracy by solving a first-order reduced system of ODEs. The method-of-lines framework is applied to the \texttt{DiscoTEX} algorithm i.e. through \underline{dis}continuous \underline{co}llocation with implicit\underline{-turned-explicit} integration methods which are symmetric and conserve symplectic structure. Furthermore, the main application of the algorithm is proved by calculating the amplitude at any desired location within the numerical grid, including at the position (and at its right and left limit) where the wave- (or wave-like) equation is discontinuous via interpolation using \texttt{DiscoTEX}. This is demonstrated, firstly by solving the wave- (or wave-like) equation and comparing the numerical weak-form solution to the exact solution. We further demonstrate how to reconstruct the gravitational metric perturbations from weak-form numerical solutions of a non-rotating black hole, which do not have known exact analytical solutions, and compare them against state-of-the-art frequency domain results. We conclude by motivating how \texttt{DiscoTEX}, and related numerical algorithms, both open a promising new alternative waveform generation route for modelling highly asymmetric binaries and complement current frequency domain methods. </p> </div> </dd> <dt> <a name='item641'>[641]</a> <a href ="/abs/2404.10351" title="Abstract" id="2404.10351"> arXiv:2404.10351 </a> (replaced) [<a href="/pdf/2404.10351" title="Download PDF" id="pdf-2404.10351" aria-labelledby="pdf-2404.10351">pdf</a>, <a href="/format/2404.10351" title="Other formats" id="oth-2404.10351" aria-labelledby="oth-2404.10351">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> On the Use of Relative Validity Indices for Comparing Clustering Approaches </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Yerbury,+L+W">Luke W. Yerbury</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Campello,+R+J+G+B">Ricardo J. G. B. Campello</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Livingston,+G+C">G. C. Livingston Jr</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Goldsworthy,+M">Mark Goldsworthy</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=O'Neil,+L">Lachlan O'Neil</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Relative Validity Indices (RVIs) such as the Silhouette Width Criterion and Davies Bouldin indices are the most widely used tools for evaluating and optimising clustering outcomes. Traditionally, their ability to rank collections of candidate dataset partitions has been used to guide the selection of the number of clusters, and to compare partitions from different clustering algorithms. However, there is a growing trend in the literature to use RVIs when selecting a Similarity Paradigm (SP) for clustering - the combination of normalisation procedure, representation method, and distance measure which affects the computation of object dissimilarities used in clustering. Despite the growing prevalence of this practice, there has been no empirical or theoretical investigation into the suitability of RVIs for this purpose. Moreover, since RVIs are computed using object dissimilarities, it remains unclear how they would need to be implemented for fair comparisons of different SPs. This study presents the first comprehensive investigation into the reliability of RVIs for SP selection. We conducted extensive experiments with seven popular RVIs on over 2.7 million clustering partitions of synthetic and real-world datasets, encompassing feature-vector and time-series data. We identified fundamental conceptual limitations undermining the use of RVIs for SP selection, and our empirical findings confirmed this predicted unsuitability. Among our recommendations, we suggest instead that practitioners select SPs by using external validation on high quality labelled datasets or carefully designed outcome-oriented objective criteria, both of which should be informed by careful consideration of dataset characteristics, and domain requirements. Our findings have important implications for clustering methodology and evaluation, suggesting the need for more rigorous approaches to SP selection. </p> </div> </dd> <dt> <a name='item642'>[642]</a> <a href ="/abs/2405.00663" title="Abstract" id="2405.00663"> arXiv:2405.00663 </a> (replaced) [<a href="/pdf/2405.00663" title="Download PDF" id="pdf-2405.00663" aria-labelledby="pdf-2405.00663">pdf</a>, <a href="https://arxiv.org/html/2405.00663v3" title="View HTML" id="html-2405.00663" aria-labelledby="html-2405.00663" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2405.00663" title="Other formats" id="oth-2405.00663" aria-labelledby="oth-2405.00663">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Quantum cryptographic protocols with dual messaging system via 2D alternate quantum walk of a genuine single-photon entangled state </div> <div class='list-authors'><a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Panda,+D+K">Dinesh Kumar Panda</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Benjamin,+C">Colin Benjamin</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 13 pages (including appendix), two figures and one table, accepted for publication in Journal of Physics A: Mathematical and Theoretical as a letter </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> Journal of Physics A: Mathematical and Theoretical (2024) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Quantum Physics (quant-ph)</span>; Disordered Systems and Neural Networks (cond-mat.dis-nn); Cryptography and Security (cs.CR); Quantum Algebra (math.QA); Optics (physics.optics) </div> <p class='mathjax'> A single-photon entangled state (or single-particle entangled state (SPES) in general) can offer a more secure way of encoding and processing quantum information than their multi-photon (or multi-particle) counterparts. The SPES generated via a 2D alternate quantum-walk setup from initially separable states can be either 3-way or 2-way entangled. This letter shows that the generated genuine three-way and nonlocal two-way SPES can be used as cryptographic keys to securely encode two distinct messages simultaneously. We detail the message encryption-decryption steps and show the resilience of the 3-way and 2-way SPES-based cryptographic protocols against eavesdropper attacks like intercept-and-resend and man-in-the-middle. We also detail the experimental realization of these protocols using a single photon, with the three degrees of freedom being OAM, path, and polarization. We have proved that the protocols have unconditional security for quantum communication tasks. The ability to simultaneously encode two distinct messages using the generated SPES showcases the versatility and efficiency of the proposed cryptographic protocol. This capability could significantly improve the throughput of quantum communication systems. </p> </div> </dd> <dt> <a name='item643'>[643]</a> <a href ="/abs/2405.01715" title="Abstract" id="2405.01715"> arXiv:2405.01715 </a> (replaced) [<a href="/pdf/2405.01715" title="Download PDF" id="pdf-2405.01715" aria-labelledby="pdf-2405.01715">pdf</a>, <a href="https://arxiv.org/html/2405.01715v2" title="View HTML" id="html-2405.01715" aria-labelledby="html-2405.01715" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2405.01715" title="Other formats" id="oth-2405.01715" aria-labelledby="oth-2405.01715">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> GRAMEP: an alignment-free method based on the Maximum Entropy Principle for identifying SNPs </div> <div class='list-authors'><a href="https://arxiv.org/search/q-bio?searchtype=author&query=Pimenta-Zanon,+M+H">Matheus Henrique Pimenta-Zanon</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Kashiwabara,+A+Y">Andr茅 Yoshiaki Kashiwabara</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Vanzela,+A+L+L">Andr茅 Lu铆s Laforga Vanzela</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Lopes,+F+M">Fabricio Martins Lopes</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Genomics (q-bio.GN)</span>; Information Theory (cs.IT); Applications (stat.AP) </div> <p class='mathjax'> Background: Advances in high throughput sequencing technologies provide a huge number of genomes to be analyzed. Thus, computational methods play a crucial role in analyzing and extracting knowledge from the data generated. Investigating genomic mutations is critical because of their impact on chromosomal evolution, genetic disorders, and diseases. It is common to adopt aligning sequences for analyzing genomic variations. However, this approach can be computationally expensive and restrictive in scenarios with large datasets. Results: We present a novel method for identifying single nucleotide polymorphisms (SNPs) in DNA sequences from assembled genomes. This study proposes GRAMEP, an alignment-free approach that adopts the principle of maximum entropy to discover the most informative k-mers specific to a genome or set of sequences under investigation. The informative k-mers enable the detection of variant-specific mutations in comparison to a reference genome or other set of sequences. In addition, our method offers the possibility of classifying novel sequences with no need for organism-specific information. GRAMEP demonstrated high accuracy in both in silico simulations and analyses of viral genomes, including Dengue, HIV, and SARS-CoV-2. Our approach maintained accurate SARS-CoV-2 variant identification while demonstrating a lower computational cost compared to methods with the same purpose. Conclusions: GRAMEP is an open and user-friendly software based on maximum entropy that provides an efficient alignment-free approach to identifying and classifying unique genomic subsequences and SNPs with high accuracy, offering advantages over comparative methods. The instructions for use, applicability, and usability of GRAMEP are open access at <a href="https://github.com/omatheuspimenta/GRAMEP" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </p> </div> </dd> <dt> <a name='item644'>[644]</a> <a href ="/abs/2405.07245" title="Abstract" id="2405.07245"> arXiv:2405.07245 </a> (replaced) [<a href="/pdf/2405.07245" title="Download PDF" id="pdf-2405.07245" aria-labelledby="pdf-2405.07245">pdf</a>, <a href="https://arxiv.org/html/2405.07245v2" title="View HTML" id="html-2405.07245" aria-labelledby="html-2405.07245" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2405.07245" title="Other formats" id="oth-2405.07245" aria-labelledby="oth-2405.07245">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Ecology, Spatial Structure, and Selection Pressure Induce Strong Signatures in Phylogenetic Structure </div> <div class='list-authors'><a href="https://arxiv.org/search/q-bio?searchtype=author&query=Moreno,+M+A">Matthew Andres Moreno</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Rodriguez-Papa,+S">Santiago Rodriguez-Papa</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Dolson,+E">Emily Dolson</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Populations and Evolution (q-bio.PE)</span>; Neural and Evolutionary Computing (cs.NE) </div> <p class='mathjax'> Evolutionary dynamics are shaped by a variety of fundamental, generic drivers, including spatial structure, ecology, and selection pressure. These drivers impact the trajectory of evolution, and have been hypothesized to influence phylogenetic structure. Here, we set out to assess (1) if spatial structure, ecology, and selection pressure leave detectable signatures in phylogenetic structure, (2) the extent, in particular, to which ecology can be detected and discerned in the presence of spatial structure, and (3) the extent to which these phylogenetic signatures generalize across evolutionary systems. To this end, we analyze phylogenies generated by manipulating spatial structure, ecology, and selection pressure within three computational models of varied scope and sophistication. We find that selection pressure, spatial structure, and ecology have characteristic effects on phylogenetic metrics, although these effects are complex and not always intuitive. Signatures have some consistency across systems when using equivalent taxonomic unit definitions (e.g., individual, genotype, species). Further, we find that sufficiently strong ecology can be detected in the presence of spatial structure. We also find that, while low-resolution phylogenetic reconstructions can bias some phylogenetic metrics, high-resolution reconstructions recapitulate them faithfully. Although our results suggest potential for evolutionary inference of spatial structure, ecology, and selection pressure through phylogenetic analysis, further methods development is needed to distinguish these drivers' phylometric signatures from each other and to appropriately normalize phylogenetic metrics. With such work, phylogenetic analysis could provide a versatile toolkit to study large-scale evolving populations. </p> </div> </dd> <dt> <a name='item645'>[645]</a> <a href ="/abs/2405.09162" title="Abstract" id="2405.09162"> arXiv:2405.09162 </a> (replaced) [<a href="/pdf/2405.09162" title="Download PDF" id="pdf-2405.09162" aria-labelledby="pdf-2405.09162">pdf</a>, <a href="https://arxiv.org/html/2405.09162v2" title="View HTML" id="html-2405.09162" aria-labelledby="html-2405.09162" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2405.09162" title="Other formats" id="oth-2405.09162" aria-labelledby="oth-2405.09162">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Complete and Terminating Tableau Calculus for Undirected Graph </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Nishimura,+Y">Yuki Nishimura</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Takagi,+T">Tsubasa Takagi</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 19 pages, 6 figures, accepted as the conference 'AWPL 2024' post-proceeding </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Logic (math.LO)</span>; Logic in Computer Science (cs.LO) </div> <p class='mathjax'> Hybrid logic is a modal logic with additional operators specifying nominals and is highly expressive. For example, there is no formula corresponding to the irreflexivity of Kripke frames in basic modal logic, but there is in hybrid logic. Irreflexivity is significant in that irreflexive and symmetric Kripke frames can be regarded as undirected graphs reviewed from a graph theoretic point of view. Thus, the study of the hybrid logic with axioms corresponding to irreflexivity and symmetry can help to elucidate the logical properties of undirected graphs. In this paper, we formulate the tableau method of the hybrid logic for undirected graphs. Our main result is to show the completeness theorem and the termination property of the tableau method, which leads us to prove the decidability. </p> </div> </dd> <dt> <a name='item646'>[646]</a> <a href ="/abs/2405.17141" title="Abstract" id="2405.17141"> arXiv:2405.17141 </a> (replaced) [<a href="/pdf/2405.17141" title="Download PDF" id="pdf-2405.17141" aria-labelledby="pdf-2405.17141">pdf</a>, <a href="https://arxiv.org/html/2405.17141v2" title="View HTML" id="html-2405.17141" aria-labelledby="html-2405.17141" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2405.17141" title="Other formats" id="oth-2405.17141" aria-labelledby="oth-2405.17141">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> MVMS-RCN: A Dual-Domain Unfolding CT Reconstruction with Multi-sparse-view and Multi-scale Refinement-correction </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Fan,+X">Xiaohong Fan</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Chen,+K">Ke Chen</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Yi,+H">Huaming Yi</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Yang,+Y">Yin Yang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Zhang,+J">Jianping Zhang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 14 pages, Accepted to IEEE Transactions on Computational Imaging, 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> X-ray Computed Tomography (CT) is one of the most important diagnostic imaging techniques in clinical applications. Sparse-view CT imaging reduces the number of projection views to a lower radiation dose and alleviates the potential risk of radiation exposure. Most existing deep learning (DL) and deep unfolding sparse-view CT reconstruction methods: 1) do not fully use the projection data; 2) do not always link their architecture designs to a mathematical theory; 3) do not flexibly deal with multi-sparse-view reconstruction assignments. This paper aims to use mathematical ideas and design optimal DL imaging algorithms for sparse-view tomography reconstructions. We propose a novel dual-domain deep unfolding unified framework that offers a great deal of flexibility for multi-sparse-view CT reconstruction with different sampling views through a single model. This framework combines the theoretical advantages of model-based methods with the superior reconstruction performance of DL-based methods, resulting in the expected generalizability of DL. We propose a refinement module that utilizes unfolding projection domain to refine full-sparse-view projection errors, as well as an image domain correction module that distills multi-scale geometric error corrections to reconstruct sparse-view CT. This provides us with a new way to explore the potential of projection information and a new perspective on designing network architectures. All parameters of our proposed framework are learnable end to end, and our method possesses the potential to be applied to plug-and-play reconstruction. Extensive experiments demonstrate that our framework is superior to other existing state-of-the-art methods. Our source codes are available at <a href="https://github.com/fanxiaohong/MVMS-RCN" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item647'>[647]</a> <a href ="/abs/2405.19347" title="Abstract" id="2405.19347"> arXiv:2405.19347 </a> (replaced) [<a href="/pdf/2405.19347" title="Download PDF" id="pdf-2405.19347" aria-labelledby="pdf-2405.19347">pdf</a>, <a href="https://arxiv.org/html/2405.19347v2" title="View HTML" id="html-2405.19347" aria-labelledby="html-2405.19347" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2405.19347" title="Other formats" id="oth-2405.19347" aria-labelledby="oth-2405.19347">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Near-Field Spot Beamfocusing: A Correlation-Aware Transfer Learning Approach </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Fallah,+M+A">Mohammad Amir Fallah</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Monemi,+M">Mehdi Monemi</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Rasti,+M">Mehdi Rasti</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Latva-Aho,+M">Matti Latva-Aho</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> 3D spot beamfocusing (SBF), in contrast to conventional angular-domain beamforming, concentrates radiating power within very small volume in both radial and angular domains in the near-field zone. Recently the implementation of channel-state-information (CSI)-independent machine learning (ML)-based approaches have been developed for effective SBF using extremely-largescale-programable-metasurface (ELPMs). These methods involve dividing the ELPMs into subarrays and independently training them with Deep Reinforcement Learning to jointly focus the beam at the Desired Focal Point (DFP). This paper explores near-field SBF using ELPMs, addressing challenges associated with lengthy training times resulting from independent training of subarrays. To achieve a faster CSIindependent solution, inspired by the correlation between the beamfocusing matrices of the subarrays, we leverage transfer learning techniques. First, we introduce a novel similarity criterion based on the Phase Distribution Image of subarray apertures. Then we devise a subarray policy propagation scheme that transfers the knowledge from trained to untrained subarrays. We further enhance learning by introducing Quasi-Liquid-Layers as a revised version of the adaptive policy reuse technique. We show through simulations that the proposed scheme improves the training speed about 5 times. Furthermore, for dynamic DFP management, we devised a DFP policy blending process, which augments the convergence rate up to 8-fold. </p> </div> </dd> <dt> <a name='item648'>[648]</a> <a href ="/abs/2406.03896" title="Abstract" id="2406.03896"> arXiv:2406.03896 </a> (replaced) [<a href="/pdf/2406.03896" title="Download PDF" id="pdf-2406.03896" aria-labelledby="pdf-2406.03896">pdf</a>, <a href="https://arxiv.org/html/2406.03896v2" title="View HTML" id="html-2406.03896" aria-labelledby="html-2406.03896" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2406.03896" title="Other formats" id="oth-2406.03896" aria-labelledby="oth-2406.03896">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Data-driven discovery of self-similarity using neural networks </div> <div class='list-authors'><a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Watanabe,+R">Ryota Watanabe</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Ishii,+T">Takanori Ishii</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Hirono,+Y">Yuji Hirono</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Maruoka,+H">Hirokazu Maruoka</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 21 pages, 18 figures, 5 tables </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Soft Condensed Matter (cond-mat.soft)</span>; Statistical Mechanics (cond-mat.stat-mech); Machine Learning (cs.LG) </div> <p class='mathjax'> Finding self-similarity is a key step for understanding the governing law behind complex physical phenomena. Traditional methods for identifying self-similarity often rely on specific models, which can introduce significant bias. In this paper, we present a novel neural network-based approach that discovers self-similarity directly from observed data, without presupposing any models. The presence of self-similar solutions in a physical problem signals that the governing law contains a function whose arguments are given by power-law monomials of physical parameters, which are characterized by power-law exponents. The basic idea is to enforce such particular forms structurally in a neural network in a parametrized way. We train the neural network model using the observed data, and when the training is successful, we can extract the power exponents that characterize scale-transformation symmetries of the physical problem. We demonstrate the effectiveness of our method with both synthetic and experimental data, validating its potential as a robust, model-independent tool for exploring self-similarity in complex systems. </p> </div> </dd> <dt> <a name='item649'>[649]</a> <a href ="/abs/2406.05153" title="Abstract" id="2406.05153"> arXiv:2406.05153 </a> (replaced) [<a href="/pdf/2406.05153" title="Download PDF" id="pdf-2406.05153" aria-labelledby="pdf-2406.05153">pdf</a>, <a href="https://arxiv.org/html/2406.05153v2" title="View HTML" id="html-2406.05153" aria-labelledby="html-2406.05153" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2406.05153" title="Other formats" id="oth-2406.05153" aria-labelledby="oth-2406.05153">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Integrating Physics of the Problem into Data-Driven Methods to Enhance Elastic Full-Waveform Inversion with Uncertainty Quantification </div> <div class='list-authors'><a href="https://arxiv.org/search/physics?searchtype=author&query=Negahdari,+V">Vahid Negahdari</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Moghadasi,+S+R">Seyed Reza Moghadasi</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Razvan,+M+R">Mohammad Reza Razvan</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Geophysics (physics.geo-ph)</span>; Machine Learning (cs.LG); Numerical Analysis (math.NA) </div> <p class='mathjax'> Full-Waveform Inversion (FWI) is a nonlinear iterative seismic imaging technique that, by reducing the misfit between recorded and predicted seismic waveforms, can produce detailed estimates of subsurface geophysical properties. Nevertheless, the strong nonlinearity of FWI can trap the optimization in local minima. This issue arises due to factors such as improper initial values, the absence of low frequencies in the measurements, noise, and other related considerations. To address this challenge and with the advent of advanced machine-learning techniques, data-driven methods, such as deep learning, have attracted significantly increasing attention in the geophysical community. Furthermore, the elastic wave equation should be included in FWI to represent elastic effects accurately. The intersection of data-driven techniques and elastic scattering theories presents opportunities and challenges. In this paper, by using the knowledge of elastic scattering (physics of the problem) and integrating it with machine learning techniques, we propose methods for the solution of time-harmonic FWI to enhance accuracy compared to pure data-driven and physics-based approaches. Moreover, to address uncertainty quantification, by modifying the structure of the Variational Autoencoder, we introduce a probabilistic deep learning method based on the physics of the problem that enables us to explore the uncertainties of the solution. According to the limited availability of datasets in this field and to assess the performance and accuracy of the proposed methods, we create a comprehensive dataset close to reality and conduct a comparative analysis of the presented approaches to it. </p> </div> </dd> <dt> <a name='item650'>[650]</a> <a href ="/abs/2406.08521" title="Abstract" id="2406.08521"> arXiv:2406.08521 </a> (replaced) [<a href="/pdf/2406.08521" title="Download PDF" id="pdf-2406.08521" aria-labelledby="pdf-2406.08521">pdf</a>, <a href="https://arxiv.org/html/2406.08521v2" title="View HTML" id="html-2406.08521" aria-labelledby="html-2406.08521" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2406.08521" title="Other formats" id="oth-2406.08521" aria-labelledby="oth-2406.08521">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Embedding-based Multimodal Learning on Pan-Squamous Cell Carcinomas for Improved Survival Outcomes </div> <div class='list-authors'><a href="https://arxiv.org/search/q-bio?searchtype=author&query=Waqas,+A">Asim Waqas</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Tripathi,+A">Aakash Tripathi</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Stewart,+P">Paul Stewart</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Naeini,+M">Mia Naeini</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Schabath,+M+B">Matthew B. Schabath</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Rasool,+G">Ghulam Rasool</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Cell Behavior (q-bio.CB)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Cancer clinics capture disease data at various scales, from genetic to organ level. Current bioinformatic methods struggle to handle the heterogeneous nature of this data, especially with missing modalities. We propose PARADIGM, a Graph Neural Network (GNN) framework that learns from multimodal, heterogeneous datasets to improve clinical outcome prediction. PARADIGM generates embeddings from multi-resolution data using foundation models, aggregates them into patient-level representations, fuses them into a unified graph, and enhances performance for tasks like survival analysis. We train GNNs on pan-Squamous Cell Carcinomas and validate our approach on Moffitt Cancer Center lung SCC data. Multimodal GNN outperforms other models in patient survival prediction. Converging individual data modalities across varying scales provides a more insightful disease view. Our solution aims to understand the patient's circumstances comprehensively, offering insights on heterogeneous data integration and the benefits of converging maximum data views. </p> </div> </dd> <dt> <a name='item651'>[651]</a> <a href ="/abs/2406.15656" title="Abstract" id="2406.15656"> arXiv:2406.15656 </a> (replaced) [<a href="/pdf/2406.15656" title="Download PDF" id="pdf-2406.15656" aria-labelledby="pdf-2406.15656">pdf</a>, <a href="https://arxiv.org/html/2406.15656v2" title="View HTML" id="html-2406.15656" aria-labelledby="html-2406.15656" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2406.15656" title="Other formats" id="oth-2406.15656" aria-labelledby="oth-2406.15656">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Self-Supervised Adversarial Diffusion Models for Fast MRI Reconstruction </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Safari,+M">Mojtaba Safari</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Eidex,+Z">Zach Eidex</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Pan,+S">Shaoyan Pan</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Qiu,+R+L">Richard L.J. Qiu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Yang,+X">Xiaofeng Yang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> Purpose: To propose a self-supervised deep learning-based compressed sensing MRI (DL-based CS-MRI) method named "Adaptive Self-Supervised Consistency Guided Diffusion Model (ASSCGD)" to accelerate data acquisition without requiring fully sampled datasets. Materials and Methods: We used the fastMRI multi-coil brain axial T2-weighted (T2-w) dataset from 1,376 cases and single-coil brain quantitative magnetization prepared 2 rapid acquisition gradient echoes (MP2RAGE) T1 maps from 318 cases to train and test our model. Robustness against domain shift was evaluated using two out-of-distribution (OOD) datasets: multi-coil brain axial postcontrast T1 -weighted (T1c) dataset from 50 cases and axial T1-weighted (T1-w) dataset from 50 patients. Data were retrospectively subsampled at acceleration rates R in {2x, 4x, 8x}. ASSCGD partitions a random sampling pattern into two disjoint sets, ensuring data consistency during training. We compared our method with ReconFormer Transformer and SS-MRI, assessing performance using normalized mean squared error (NMSE), peak signal-to-noise ratio (PSNR), and structural similarity index (SSIM). Statistical tests included one-way analysis of variance (ANOVA) and multi-comparison Tukey's Honesty Significant Difference (HSD) tests. Results: ASSCGD preserved fine structures and brain abnormalities visually better than comparative methods at R = 8x for both multi-coil and single-coil datasets. It achieved the lowest NMSE at R in {4x, 8x}, and the highest PSNR and SSIM values at all acceleration rates for the multi-coil dataset. Similar trends were observed for the single-coil dataset, though SSIM values were comparable to ReconFormer at R in {2x, 8x}. These results were further confirmed by the voxel-wise correlation scatter plots. OOD results showed significant (p << 10^-5 ) improvements in undersampled image quality after reconstruction. </p> </div> </dd> <dt> <a name='item652'>[652]</a> <a href ="/abs/2407.10689" title="Abstract" id="2407.10689"> arXiv:2407.10689 </a> (replaced) [<a href="/pdf/2407.10689" title="Download PDF" id="pdf-2407.10689" aria-labelledby="pdf-2407.10689">pdf</a>, <a href="/format/2407.10689" title="Other formats" id="oth-2407.10689" aria-labelledby="oth-2407.10689">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Classification of Heart Sounds Using Multi-Branch Deep Convolutional Network and LSTM-CNN </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Latifi,+S+A">Seyed Amir Latifi</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Ghassemian,+H">Hassan Ghassemian</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Imani,+M">Maryam Imani</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 22 pages </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span>; Artificial Intelligence (cs.AI); Sound (cs.SD); Audio and Speech Processing (eess.AS) </div> <p class='mathjax'> This paper presents a fast and cost-effective method for diagnosing cardiac abnormalities with high accuracy and reliability using low-cost systems in clinics. The primary limitation of automatic diagnosing of cardiac diseases is the rarity of correct and acceptable labeled samples, which can be expensive to prepare. To address this issue, two methods are proposed in this work. The first method is a unique Multi-Branch Deep Convolutional Neural Network (MBDCN) architecture inspired by human auditory processing, specifically designed to optimize feature extraction by employing various sizes of convolutional filters and audio signal power spectrum as input. In the second method, called as Long short-term memory-Convolutional Neural (LSCN) model, Additionally, the network architecture includes Long Short-Term Memory (LSTM) network blocks to improve feature extraction in the time domain. The innovative approach of combining multiple parallel branches consisting of the one-dimensional convolutional layers along with LSTM blocks helps in achieving superior results in audio signal processing tasks. The experimental results demonstrate superiority of the proposed methods over the state-of-the-art techniques. The overall classification accuracy of heart sounds with the LSCN network is more than 96%. The efficiency of this network is significant compared to common feature extraction methods such as Mel Frequency Cepstral Coefficients (MFCC) and wavelet transform. Therefore, the proposed method shows promising results in the automatic analysis of heart sounds and has potential applications in the diagnosis and early detection of cardiovascular diseases. </p> </div> </dd> <dt> <a name='item653'>[653]</a> <a href ="/abs/2407.10921" title="Abstract" id="2407.10921"> arXiv:2407.10921 </a> (replaced) [<a href="/pdf/2407.10921" title="Download PDF" id="pdf-2407.10921" aria-labelledby="pdf-2407.10921">pdf</a>, <a href="https://arxiv.org/html/2407.10921v4" title="View HTML" id="html-2407.10921" aria-labelledby="html-2407.10921" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2407.10921" title="Other formats" id="oth-2407.10921" aria-labelledby="oth-2407.10921">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Leveraging Bi-Focal Perspectives and Granular Feature Integration for Accurate Reliable Early Alzheimer's Detection </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=V,+P">Pandiyaraju V</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Venkatraman,+S">Shravan Venkatraman</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=A,+A">Abeshek A</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=S,+P+K">Pavan Kumar S</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=A,+A+S">Aravintakshan S A</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=A,+K">Kannan A</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 14 pages, 12 figures, 6 tables </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Computer Vision and Pattern Recognition (cs.CV); Machine Learning (cs.LG) </div> <p class='mathjax'> Alzheimer's disease (AD) is the most common neurodegeneration, annually diagnosed in millions of patients. The present medicine scenario still finds challenges in the exact diagnosis and classification of AD through neuroimaging data. Traditional CNNs can extract a good amount of low-level information in an image but fail to extract high-level minuscule particles, which is a significant challenge in detecting AD from MRI scans. To overcome this, we propose a novel Granular Feature Integration method to combine information extraction at different scales combined with an efficient information flow, enabling the model to capture both broad and fine-grained features simultaneously. We also propose a Bi-Focal Perspective mechanism to highlight the subtle neurofibrillary tangles and amyloid plaques in the MRI scans, ensuring that critical pathological markers are accurately identified. Our model achieved an F1-Score of 99.31%, precision of 99.24%, and recall of 99.51%. These scores prove that our model is significantly better than the state-of-the-art (SOTA) CNNs in existence. </p> </div> </dd> <dt> <a name='item654'>[654]</a> <a href ="/abs/2408.05384" title="Abstract" id="2408.05384"> arXiv:2408.05384 </a> (replaced) [<a href="/pdf/2408.05384" title="Download PDF" id="pdf-2408.05384" aria-labelledby="pdf-2408.05384">pdf</a>, <a href="https://arxiv.org/html/2408.05384v2" title="View HTML" id="html-2408.05384" aria-labelledby="html-2408.05384" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2408.05384" title="Other formats" id="oth-2408.05384" aria-labelledby="oth-2408.05384">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Nonlinear Propagation of Non-Gaussian Uncertainties </div> <div class='list-authors'><a href="https://arxiv.org/search/physics?searchtype=author&query=Acciarini,+G">Giacomo Acciarini</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Baresi,+N">Nicola Baresi</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Lloyd,+D">David Lloyd</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Izzo,+D">Dario Izzo</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Space Physics (physics.space-ph)</span>; Symbolic Computation (cs.SC); Probability (math.PR); Chaotic Dynamics (nlin.CD) </div> <p class='mathjax'> This paper presents a novel approach for propagating uncertainties in dynamical systems building on high-order Taylor expansions of the flow and moment-generating functions (MGFs). Unlike prior methods that focus on Gaussian distributions, our approach leverages the relationship between MGFs and distribution moments to extend high-order uncertainty propagation techniques to non-Gaussian scenarios. This significantly broadens the applicability of these methods to a wider range of problems and uncertainty types. High-order moment computations are performed one-off and symbolically, reducing the computational burden of the technique to the calculation of Taylor series coefficients around a nominal trajectory, achieved by efficiently integrating the system's variational equations. Furthermore, the use of the proposed approach in combination with event transition tensors, allows for accurate propagation of uncertainties at specific events, such as the landing surface of a celestial body, the crossing of a predefined Poincar茅 section, or the trigger of an arbitrary event during the propagation. Via numerical simulations we demonstrate the effectiveness of our method in various astrodynamics applications, including the unperturbed and perturbed two-body problem, and the circular restricted three-body problem, showing that it accurately propagates non-Gaussian uncertainties both at future times and at event manifolds. </p> </div> </dd> <dt> <a name='item655'>[655]</a> <a href ="/abs/2408.10001" title="Abstract" id="2408.10001"> arXiv:2408.10001 </a> (replaced) [<a href="/pdf/2408.10001" title="Download PDF" id="pdf-2408.10001" aria-labelledby="pdf-2408.10001">pdf</a>, <a href="https://arxiv.org/html/2408.10001v3" title="View HTML" id="html-2408.10001" aria-labelledby="html-2408.10001" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2408.10001" title="Other formats" id="oth-2408.10001" aria-labelledby="oth-2408.10001">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Coprime Bivariate Bicycle Codes </div> <div class='list-authors'><a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Wang,+M">Ming Wang</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Mueller,+F">Frank Mueller</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Quantum Physics (quant-ph)</span>; Information Theory (cs.IT) </div> <p class='mathjax'> This work (1) proposes a novel numerical algorithm to accelerate the search process for good Bivariate Bicycle (BB) codes and (2) defines a new subclass of BB codes suitable for quantum error correction. The proposed acceleration search algorithm reduces the search space by excluding some equivalent codes from the search space, as well as setting thresholds to drop bad codes at an early stage. A number of new BB codes found by this algorithm are reported. The proposed subclass of BB codes employs coprimes to construct groups via polynomials as the basis for the BB code, rather than using the standard BB codes with unconstrained constructors. In contrast to vanilla BB codes, where parameters remain unknown prior to code discovery, the rate of the proposed code can be determined beforehand by specifying a factor polynomial as an input to the numerical search algorithm. Using this coprime BB construction, we found a number of surprisingly short to medium-length codes that were previously unknown. </p> </div> </dd> <dt> <a name='item656'>[656]</a> <a href ="/abs/2408.12752" title="Abstract" id="2408.12752"> arXiv:2408.12752 </a> (replaced) [<a href="/pdf/2408.12752" title="Download PDF" id="pdf-2408.12752" aria-labelledby="pdf-2408.12752">pdf</a>, <a href="https://arxiv.org/html/2408.12752v2" title="View HTML" id="html-2408.12752" aria-labelledby="html-2408.12752" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2408.12752" title="Other formats" id="oth-2408.12752" aria-labelledby="oth-2408.12752">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> High-distance codes with transversal Clifford and T-gates </div> <div class='list-authors'><a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Jain,+S+P">Shubham P. Jain</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Albert,+V+V">Victor V. Albert</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 2 tables, 3 figures. Updated version: Includes a family of triorthogonal codes with improved parameters. Includes a more in-depth discussion of T-gate code families </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Quantum Physics (quant-ph)</span>; Information Theory (cs.IT); Number Theory (math.NT) </div> <p class='mathjax'> The non-local interactions in several quantum devices allow for the realization of more compact quantum encodings while retaining the same degree of protection against noise. Anticipating that short to medium-length codes will soon be realizable, it is important to construct stabilizer codes that, for a given code distance, admit fault-tolerant implementations of logical gates with the fewest number of physical qubits. We extract high-distance doubly even codes from the quantum quadratic-residue code family that admit a transversal implementation of the single-qubit Clifford group and block transversal implementation of the full Clifford group. Applying a doubling procedure [<a href="https://arxiv.org/abs/1509.03239" data-arxiv-id="1509.03239" class="link-https">arXiv:1509.03239</a>] to such codes yields a family of high-distance weak triply even codes which admit a transversal implementation of the logical $\texttt{T}$-gate. Relaxing the triply even property, we also obtain a family of triorthogonal codes which requires an even lower overhead at the cost of additional Clifford gates to achieve the same logical operation. To our knowledge, our doubly even and triorthogonal families are the shortest qubit stabilizer codes of the same distance that can realize their respective gates. </p> </div> </dd> <dt> <a name='item657'>[657]</a> <a href ="/abs/2409.07462" title="Abstract" id="2409.07462"> arXiv:2409.07462 </a> (replaced) [<a href="/pdf/2409.07462" title="Download PDF" id="pdf-2409.07462" aria-labelledby="pdf-2409.07462">pdf</a>, <a href="https://arxiv.org/html/2409.07462v2" title="View HTML" id="html-2409.07462" aria-labelledby="html-2409.07462" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2409.07462" title="Other formats" id="oth-2409.07462" aria-labelledby="oth-2409.07462">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> S-MolSearch: 3D Semi-supervised Contrastive Learning for Bioactive Molecule Search </div> <div class='list-authors'><a href="https://arxiv.org/search/q-bio?searchtype=author&query=Zhou,+G">Gengmo Zhou</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Wang,+Z">Zhen Wang</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Yu,+F">Feng Yu</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Ke,+G">Guolin Ke</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Wei,+Z">Zhewei Wei</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Gao,+Z">Zhifeng Gao</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Biomolecules (q-bio.BM)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Virtual Screening is an essential technique in the early phases of drug discovery, aimed at identifying promising drug candidates from vast molecular libraries. Recently, ligand-based virtual screening has garnered significant attention due to its efficacy in conducting extensive database screenings without relying on specific protein-binding site information. Obtaining binding affinity data for complexes is highly expensive, resulting in a limited amount of available data that covers a relatively small chemical space. Moreover, these datasets contain a significant amount of inconsistent noise. It is challenging to identify an inductive bias that consistently maintains the integrity of molecular activity during data augmentation. To tackle these challenges, we propose S-MolSearch, the first framework to our knowledge, that leverages molecular 3D information and affinity information in semi-supervised contrastive learning for ligand-based virtual screening. Drawing on the principles of inverse optimal transport, S-MolSearch efficiently processes both labeled and unlabeled data, training molecular structural encoders while generating soft labels for the unlabeled data. This design allows S-MolSearch to adaptively utilize unlabeled data within the learning process. Empirically, S-MolSearch demonstrates superior performance on widely-used benchmarks LIT-PCBA and DUD-E. It surpasses both structure-based and ligand-based virtual screening methods for AUROC, BEDROC and EF. </p> </div> </dd> <dt> <a name='item658'>[658]</a> <a href ="/abs/2409.10664" title="Abstract" id="2409.10664"> arXiv:2409.10664 </a> (replaced) [<a href="/pdf/2409.10664" title="Download PDF" id="pdf-2409.10664" aria-labelledby="pdf-2409.10664">pdf</a>, <a href="https://arxiv.org/html/2409.10664v2" title="View HTML" id="html-2409.10664" aria-labelledby="html-2409.10664" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2409.10664" title="Other formats" id="oth-2409.10664" aria-labelledby="oth-2409.10664">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Proximal Gradient Dynamics: Monotonicity, Exponential Convergence, and Applications </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Gokhale,+A">Anand Gokhale</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Davydov,+A">Alexander Davydov</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Bullo,+F">Francesco Bullo</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Submitted to IEEE L-CSS and ACC, 7 pages, 1 figure </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Optimization and Control (math.OC)</span>; Signal Processing (eess.SP); Systems and Control (eess.SY) </div> <p class='mathjax'> In this letter we study the proximal gradient dynamics. This recently-proposed continuous-time dynamics solves optimization problems whose cost functions are separable into a nonsmooth convex and a smooth component. First, we show that the cost function decreases monotonically along the trajectories of the proximal gradient dynamics. We then introduce a new condition that guarantees exponential convergence of the cost function to its optimal value, and show that this condition implies the proximal Polyak-艁ojasiewicz condition. We also show that the proximal Polyak-艁ojasiewicz condition guarantees exponential convergence of the cost function. Moreover, we extend these results to time-varying optimization problems, providing bounds for equilibrium tracking. Finally, we discuss applications of these findings, including the LASSO problem, certain matrix based problems and a numerical experiment on a feed-forward neural network. </p> </div> </dd> <dt> <a name='item659'>[659]</a> <a href ="/abs/2410.12976" title="Abstract" id="2410.12976"> arXiv:2410.12976 </a> (replaced) [<a href="/pdf/2410.12976" title="Download PDF" id="pdf-2410.12976" aria-labelledby="pdf-2410.12976">pdf</a>, <a href="https://arxiv.org/html/2410.12976v2" title="View HTML" id="html-2410.12976" aria-labelledby="html-2410.12976" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.12976" title="Other formats" id="oth-2410.12976" aria-labelledby="oth-2410.12976">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Kapitza-Inspired Stabilization of Non-Foster Circuits via Time Modulations </div> <div class='list-authors'><a href="https://arxiv.org/search/physics?searchtype=author&query=Alex-Amor,+A">Antonio Alex-Amor</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Ptitcyn,+G">Grigorii Ptitcyn</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Engheta,+N">Nader Engheta</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 10 pages (7 pages main text, 3 pages supplementary materials), 4 figures; a minor issue in Fig. 3(a) is corrected </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Applied Physics (physics.app-ph)</span>; Systems and Control (eess.SY) </div> <p class='mathjax'> With his formal analysis in 1951, the physicist Pyotr Kapitza demonstrated that an inverted pendulum with an externally vibrating base can be stable in its upper position, thus overcoming the force of gravity. Kapitza's work is an example that an originally unstable system can become stable after a minor perturbation of its properties or initial conditions is applied. Inspired by his ideas, we show how non-Foster circuits can be stabilized with the application of external \textit{electrical vibration}, i.e., time modulations. Non-Foster circuits are highly appreciated in the engineering community since their bandwidth characteristics are not limited by passive-circuits bounds. Unfortunately, non-Foster circuits are usually unstable and they must be stabilized prior to operation. Here, we focus on the study of non-Foster $L(t)C$ circuits with time-varying inductors and time-invariant negative capacitors. We find an intrinsic connection between Kapitza's inverted pendulum and non-Foster $L(t)C$ resonators. Moreover, we show how positive time-varying modulations of $L(t)>0$ can overcome and stabilize non-Foster negative capacitances $C<0$. These findings open up an alternative manner of stabilizing electric circuits with the use of time modulations, and lay the groundwork for application of, what we coin \textit{Vibrational Electromagnetics}, in more complex media. </p> </div> </dd> <dt> <a name='item660'>[660]</a> <a href ="/abs/2410.18254" title="Abstract" id="2410.18254"> arXiv:2410.18254 </a> (replaced) [<a href="/pdf/2410.18254" title="Download PDF" id="pdf-2410.18254" aria-labelledby="pdf-2410.18254">pdf</a>, <a href="https://arxiv.org/html/2410.18254v2" title="View HTML" id="html-2410.18254" aria-labelledby="html-2410.18254" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.18254" title="Other formats" id="oth-2410.18254" aria-labelledby="oth-2410.18254">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Refining Ky Fan's majorization relation with linear programming </div> <div class='list-authors'><a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Alhejji,+M+A">Mohammad A. Alhejji</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 36 pages, 2 figures, error in version 1 corrected </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Quantum Physics (quant-ph)</span>; Information Theory (cs.IT); Rings and Algebras (math.RA) </div> <p class='mathjax'> A separable version of Ky Fan's majorization relation is proven for a sum of two operators that are each a tensor product of two positive semi-definite operators. In order to prove it, upper bounds are established for the relevant largest eigenvalue sums in terms of the optimal values of certain linear programs. The objective function of these linear programs is the dual of the direct sum of the spectra of the summands. The feasible sets are bounded polyhedra determined by positive numbers, called alignment terms, that quantify the overlaps between pairs of largest eigenvalue spaces of the summands. By appealing to geometric considerations, tight upper bounds are established on the alignment terms of tensor products of positive semi-definite operators. As an application, the spin alignment conjecture in quantum information theory is affirmatively resolved to the 2-letter level. Consequently, the coherent information of platypus channels is additive to the 2-letter level. </p> </div> </dd> <dt> <a name='item661'>[661]</a> <a href ="/abs/2411.01589" title="Abstract" id="2411.01589"> arXiv:2411.01589 </a> (replaced) [<a href="/pdf/2411.01589" title="Download PDF" id="pdf-2411.01589" aria-labelledby="pdf-2411.01589">pdf</a>, <a href="https://arxiv.org/html/2411.01589v2" title="View HTML" id="html-2411.01589" aria-labelledby="html-2411.01589" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.01589" title="Other formats" id="oth-2411.01589" aria-labelledby="oth-2411.01589">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> BiT-MamSleep: Bidirectional Temporal Mamba for EEG Sleep Staging </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Zhou,+X">Xinliang Zhou</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Han,+Y">Yuzhe Han</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Chen,+Z">Zhisheng Chen</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Liu,+C">Chenyu Liu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Ding,+Y">Yi Ding</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Jia,+Z">Ziyu Jia</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Liu,+Y">Yang Liu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> In this paper, we address the challenges in automatic sleep stage classification, particularly the high computational cost, inadequate modeling of bidirectional temporal dependencies, and class imbalance issues faced by Transformer-based models. To address these limitations, we propose BiT-MamSleep, a novel architecture that integrates the Triple-Resolution CNN (TRCNN) for efficient multi-scale feature extraction with the Bidirectional Mamba (BiMamba) mechanism, which models both short- and long-term temporal dependencies through bidirectional processing of EEG data. Additionally, BiT-MamSleep incorporates an Adaptive Feature Recalibration (AFR) module and a temporal enhancement block to dynamically refine feature importance, optimizing classification accuracy without increasing computational complexity. To further improve robustness, we apply optimization techniques such as Focal Loss and SMOTE to mitigate class imbalance. Extensive experiments on four public datasets demonstrate that BiT-MamSleep significantly outperforms state-of-the-art methods, particularly in handling long EEG sequences and addressing class imbalance, leading to more accurate and scalable sleep stage classification. </p> </div> </dd> <dt> <a name='item662'>[662]</a> <a href ="/abs/2411.06043" title="Abstract" id="2411.06043"> arXiv:2411.06043 </a> (replaced) [<a href="/pdf/2411.06043" title="Download PDF" id="pdf-2411.06043" aria-labelledby="pdf-2411.06043">pdf</a>, <a href="https://arxiv.org/html/2411.06043v2" title="View HTML" id="html-2411.06043" aria-labelledby="html-2411.06043" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.06043" title="Other formats" id="oth-2411.06043" aria-labelledby="oth-2411.06043">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> The subTuring degrees </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Kihara,+T">Takayuki Kihara</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Ng,+K+M">Keng Meng Ng</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Logic (math.LO)</span>; Logic in Computer Science (cs.LO) </div> <p class='mathjax'> In this article, we introduce a notion of reducibility for partial functions on the natural numbers, which we call subTuring reducibility. One important aspect is that the subTuring degrees correspond to the structure of the realizability subtoposes of the effective topos. We show that the subTuring degrees (that is, the realizability subtoposes of the effective topos) form a dense non-modular (thus, non-distributive) lattice. We also show that there is a nonzero join-irreducible subTuring degree (which implies that there is a realizability subtopos of the effective topos that cannot be decomposed into two smaller realizability subtoposes). </p> </div> </dd> <dt> <a name='item663'>[663]</a> <a href ="/abs/2411.06650" title="Abstract" id="2411.06650"> arXiv:2411.06650 </a> (replaced) [<a href="/pdf/2411.06650" title="Download PDF" id="pdf-2411.06650" aria-labelledby="pdf-2411.06650">pdf</a>, <a href="/format/2411.06650" title="Other formats" id="oth-2411.06650" aria-labelledby="oth-2411.06650">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Quantum Policy Gradient in Reproducing Kernel Hilbert Space </div> <div class='list-authors'><a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Bossens,+D+M">David M. Bossens</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Bharti,+K">Kishor Bharti</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Thompson,+J">Jayne Thompson</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Quantum Physics (quant-ph)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Parametrised quantum circuits offer expressive and data-efficient representations for machine learning. Due to quantum states residing in a high-dimensional Hilbert space, parametrised quantum circuits have a natural interpretation in terms of kernel methods. The representation of quantum circuits in terms of quantum kernels has been studied widely in quantum supervised learning, but has been overlooked in the context of quantum reinforcement learning. This paper proposes parametric and non-parametric policy gradient and actor-critic algorithms with quantum kernel policies in quantum environments. This approach, implemented with both numerical and analytical quantum policy gradient techniques, allows exploiting the many advantages of kernel methods, including available analytic forms for the gradient of the policy and tunable expressiveness. The proposed approach is suitable for vector-valued action spaces and each of the formulations demonstrates a quadratic reduction in query complexity compared to their classical counterparts. Two actor-critic algorithms, one based on stochastic policy gradient and one based on deterministic policy gradient (comparable to the popular DDPG algorithm), demonstrate additional query complexity reductions compared to quantum policy gradient algorithms under favourable conditions. </p> </div> </dd> <dt> <a name='item664'>[664]</a> <a href ="/abs/2411.06675" title="Abstract" id="2411.06675"> arXiv:2411.06675 </a> (replaced) [<a href="/pdf/2411.06675" title="Download PDF" id="pdf-2411.06675" aria-labelledby="pdf-2411.06675">pdf</a>, <a href="https://arxiv.org/html/2411.06675v2" title="View HTML" id="html-2411.06675" aria-labelledby="html-2411.06675" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.06675" title="Other formats" id="oth-2411.06675" aria-labelledby="oth-2411.06675">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> FCA using the Concept Explorer in 2024 </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Vargas-Garc%C3%8Da,+E">Edith Vargas-Garc脥a</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Wachtel,+A">Andreas Wachtel</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 10 pages, 1 context, 9 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Logic (math.LO)</span>; Logic in Computer Science (cs.LO) </div> <p class='mathjax'> In this note we give a very short introduction to Formal Concept Analysis, accompanied by an example in order to build concept lattices from a context. We build the lattice using the Java-based software Concept Explorer (ConExp) in a recent version of Linux. Installing an appropriate Java version is necessary, because ConExp was developed some time ago using a Sun Java version, which is not open-source. As a result, it has been observed that ConExp will not build a lattice when started with an open-source Java version. Therefore, we also sketch the procedure we followed to install an appropriate Java version which makes ConExp work again, i.e., to "build lattices again". We also show how to start ConExp with a 32 bit Java version, which requires a few additional libraries. </p> </div> </dd> <dt> <a name='item665'>[665]</a> <a href ="/abs/2411.07249" title="Abstract" id="2411.07249"> arXiv:2411.07249 </a> (replaced) [<a href="/pdf/2411.07249" title="Download PDF" id="pdf-2411.07249" aria-labelledby="pdf-2411.07249">pdf</a>, <a href="https://arxiv.org/html/2411.07249v3" title="View HTML" id="html-2411.07249" aria-labelledby="html-2411.07249" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.07249" title="Other formats" id="oth-2411.07249" aria-labelledby="oth-2411.07249">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> SPDIM: Source-Free Unsupervised Conditional and Label Shift Adaptation in EEG </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Li,+S">Shanglin Li</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Kawanabe,+M">Motoaki Kawanabe</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Kobler,+R+J">Reinmar J. Kobler</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> The non-stationary nature of electroencephalography (EEG) introduces distribution shifts across domains (e.g., days and subjects), posing a significant challenge to EEG-based neurotechnology generalization. Without labeled calibration data for target domains, the problem is a source-free unsupervised domain adaptation (SFUDA) problem. For scenarios with constant label distribution, Riemannian geometry-aware statistical alignment frameworks on the symmetric positive definite (SPD) manifold are considered state-of-the-art. However, many practical scenarios, including EEG-based sleep staging, exhibit label shifts. Here, we propose a geometric deep learning framework for SFUDA problems under specific distribution shifts, including label shifts. We introduce a novel, realistic generative model and show that prior Riemannian statistical alignment methods on the SPD manifold can compensate for specific marginal and conditional distribution shifts but hurt generalization under label shifts. As a remedy, we propose a parameter-efficient manifold optimization strategy termed SPDIM. SPDIM uses the information maximization principle to learn a single SPD-manifold-constrained parameter per target domain. In simulations, we demonstrate that SPDIM can compensate for the shifts under our generative model. Moreover, using public EEG-based brain-computer interface and sleep staging datasets, we show that SPDIM outperforms prior approaches. </p> </div> </dd> <dt> <a name='item666'>[666]</a> <a href ="/abs/2411.10798" title="Abstract" id="2411.10798"> arXiv:2411.10798 </a> (replaced) [<a href="/pdf/2411.10798" title="Download PDF" id="pdf-2411.10798" aria-labelledby="pdf-2411.10798">pdf</a>, <a href="/format/2411.10798" title="Other formats" id="oth-2411.10798" aria-labelledby="oth-2411.10798">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Unveiling Hidden Details: A RAW Data-Enhanced Paradigm for Real-World Super-Resolution </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Peng,+L">Long Peng</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Li,+W">Wenbo Li</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Guo,+J">Jiaming Guo</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Di,+X">Xin Di</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Sun,+H">Haoze Sun</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Li,+Y">Yong Li</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Pei,+R">Renjing Pei</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Wang,+Y">Yang Wang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Cao,+Y">Yang Cao</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Zha,+Z">Zheng-Jun Zha</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> We sincerely apologize, but due to some commercial confidentiality agreements related to the report, we have decided to withdraw the submission for now and will resubmit after making the necessary revisions </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> Real-world image super-resolution (Real SR) aims to generate high-fidelity, detail-rich high-resolution (HR) images from low-resolution (LR) counterparts. Existing Real SR methods primarily focus on generating details from the LR RGB domain, often leading to a lack of richness or fidelity in fine details. In this paper, we pioneer the use of details hidden in RAW data to complement existing RGB-only methods, yielding superior outputs. We argue that key image processing steps in Image Signal Processing, such as denoising and demosaicing, inherently result in the loss of fine details in LR images, making LR RAW a valuable information source. To validate this, we present RealSR-RAW, a comprehensive dataset comprising over 10,000 pairs with LR and HR RGB images, along with corresponding LR RAW, captured across multiple smartphones under varying focal lengths and diverse scenes. Additionally, we propose a novel, general RAW adapter to efficiently integrate LR RAW data into existing CNNs, Transformers, and Diffusion-based Real SR models by suppressing the noise contained in LR RAW and aligning its distribution. Extensive experiments demonstrate that incorporating RAW data significantly enhances detail recovery and improves Real SR performance across ten evaluation metrics, including both fidelity and perception-oriented metrics. Our findings open a new direction for the Real SR task, with the dataset and code will be made available to support future research. </p> </div> </dd> <dt> <a name='item667'>[667]</a> <a href ="/abs/2411.12036" title="Abstract" id="2411.12036"> arXiv:2411.12036 </a> (replaced) [<a href="/pdf/2411.12036" title="Download PDF" id="pdf-2411.12036" aria-labelledby="pdf-2411.12036">pdf</a>, <a href="https://arxiv.org/html/2411.12036v2" title="View HTML" id="html-2411.12036" aria-labelledby="html-2411.12036" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.12036" title="Other formats" id="oth-2411.12036" aria-labelledby="oth-2411.12036">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Prediction-Guided Active Experiments </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Ao,+R">Ruicheng Ao</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Chen,+H">Hongyu Chen</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Simchi-Levi,+D">David Simchi-Levi</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 25 pages, 11 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Machine Learning (cs.LG); Econometrics (econ.EM) </div> <p class='mathjax'> In this work, we introduce a new framework for active experimentation, the Prediction-Guided Active Experiment (PGAE), which leverages predictions from an existing machine learning model to guide sampling and experimentation. Specifically, at each time step, an experimental unit is sampled according to a designated sampling distribution, and the actual outcome is observed based on an experimental probability. Otherwise, only a prediction for the outcome is available. We begin by analyzing the non-adaptive case, where full information on the joint distribution of the predictor and the actual outcome is assumed. For this scenario, we derive an optimal experimentation strategy by minimizing the semi-parametric efficiency bound for the class of regular estimators. We then introduce an estimator that meets this efficiency bound, achieving asymptotic optimality. Next, we move to the adaptive case, where the predictor is continuously updated with newly sampled data. We show that the adaptive version of the estimator remains efficient and attains the same semi-parametric bound under certain regularity assumptions. Finally, we validate PGAE's performance through simulations and a semi-synthetic experiment using data from the US Census Bureau. The results underscore the PGAE framework's effectiveness and superiority compared to other existing methods. </p> </div> </dd> <dt> <a name='item668'>[668]</a> <a href ="/abs/2411.12582" title="Abstract" id="2411.12582"> arXiv:2411.12582 </a> (replaced) [<a href="/pdf/2411.12582" title="Download PDF" id="pdf-2411.12582" aria-labelledby="pdf-2411.12582">pdf</a>, <a href="https://arxiv.org/html/2411.12582v2" title="View HTML" id="html-2411.12582" aria-labelledby="html-2411.12582" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.12582" title="Other formats" id="oth-2411.12582" aria-labelledby="oth-2411.12582">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Reconfiguration Using Generalized Token Jumping </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=K%C5%99i%C5%A1%C5%A5an,+J+M">Jan Maty谩拧 K艡i拧钮an</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Svoboda,+J">Jakub Svoboda</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> To appear at WALCOM 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Combinatorics (math.CO)</span>; Data Structures and Algorithms (cs.DS) </div> <p class='mathjax'> In reconfiguration, we are given two solutions to a graph problem, such as Vertex Cover or Dominating Set, with each solu tion represented by a placement of tokens on vertices of the graph. Our task is to reconfigure one into the other using small steps while ensuring the intermediate configurations of tokens are also valid solutions. The two commonly studied settings are Token Jumping and Token Sliding, which allows moving a single token to an arbitrary or an adjacent vertex, respectively. <br>We introduce new rules that generalize Token Jumping, parameterized by the number of tokens allowed to move at once and by the maximum distance of each move. Our main contribution is identifying minimal rules that allow reconfiguring any possible given solution into any other for Independent Set, Vertex Cover, and Dominating Set. For each minimal rule, we also provide an efficient algorithm that finds a corresponding reconfiguration sequence. <br>We further focus on the rule that allows each token to move to an adjacent vertex in a single step. This natural variant turns out to be the minimal rule that guarantees reconfigurability for Vertex Cover. We determine the computational complexity of deciding whether a (shortest) reconfiguration sequence exists under this rule for the three studied problems. While reachability for Vertex Cover is shown to be in P, finding a shortest sequence is shown to be NP-complete. For Independent Set and Dominating Set, even reachability is shown to be PSPACE-complete. </p> </div> </dd> <dt> <a name='item669'>[669]</a> <a href ="/abs/2411.13280" title="Abstract" id="2411.13280"> arXiv:2411.13280 </a> (replaced) [<a href="/pdf/2411.13280" title="Download PDF" id="pdf-2411.13280" aria-labelledby="pdf-2411.13280">pdf</a>, <a href="https://arxiv.org/html/2411.13280v2" title="View HTML" id="html-2411.13280" aria-labelledby="html-2411.13280" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.13280" title="Other formats" id="oth-2411.13280" aria-labelledby="oth-2411.13280">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Structure-Based Molecule Optimization via Gradient-Guided Bayesian Update </div> <div class='list-authors'><a href="https://arxiv.org/search/q-bio?searchtype=author&query=Qiu,+K">Keyue Qiu</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Song,+Y">Yuxuan Song</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Yu,+J">Jie Yu</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Ma,+H">Hongbo Ma</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Cao,+Z">Ziyao Cao</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Zhang,+Z">Zhilong Zhang</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Wu,+Y">Yushuai Wu</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Zheng,+M">Mingyue Zheng</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Zhou,+H">Hao Zhou</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Ma,+W">Wei-Ying Ma</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 27 pages, 17 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Biomolecules (q-bio.BM)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Structure-based molecule optimization (SBMO) aims to optimize molecules with both continuous coordinates and discrete types against protein targets. A promising direction is to exert gradient guidance on generative models given its remarkable success in images, but it is challenging to guide discrete data and risks inconsistencies between modalities. To this end, we leverage a continuous and differentiable space derived through Bayesian inference, presenting Molecule Joint Optimization (MolJO), the first gradient-based SBMO framework that facilitates joint guidance signals across different modalities while preserving SE(3)-equivariance. We introduce a novel backward correction strategy that optimizes within a sliding window of the past histories, allowing for a seamless trade-off between explore-and-exploit during optimization. Our proposed MolJO achieves state-of-the-art performance on CrossDocked2020 benchmark (Success Rate 51.3% , Vina Dock -9.05 and SA 0.78), more than 4x improvement in Success Rate compared to the gradient-based counterpart, and 2x "Me-Better" Ratio as much as 3D baselines. Furthermore, we extend MolJO to a wide range of optimization settings, including multi-objective optimization and challenging tasks in drug design such as R-group optimization and scaffold hopping, further underscoring its versatility and potential. </p> </div> </dd> </dl> <div class='paging'>Total of 669 entries </div> <div class='morefewer'>Showing up to 2000 entries per page: <a href=/list/cs/new?skip=0&show=1000 rel="nofollow"> fewer</a> | <span style="color: #454545">more</span> | <span style="color: #454545">all</span> </div> </div> </div> </div> </main> <footer style="clear: both;"> <div class="columns is-desktop" role="navigation" aria-label="Secondary" style="margin: -0.75em -0.75em 0.75em -0.75em">  <div class="column" style="padding: 0;"> <div class="columns"> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column" style="padding: 0;"> <div class="columns"> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>   </div> </footer> </div> <script src="/static/base/1.0.1/js/member_acknowledgement.js"></script> </body> </html>