CINXE.COM
Machine Learning
<!DOCTYPE html> <html lang="en"> <head> <title>Machine Learning </title> <meta name="viewport" content="width=device-width, initial-scale=1"> <link rel="apple-touch-icon" sizes="180x180" href="/static/browse/0.3.4/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="/static/browse/0.3.4/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="/static/browse/0.3.4/images/icons/favicon-16x16.png"> <link rel="manifest" href="/static/browse/0.3.4/images/icons/site.webmanifest"> <link rel="mask-icon" href="/static/browse/0.3.4/images/icons/safari-pinned-tab.svg" color="#5bbad5"> <meta name="msapplication-TileColor" content="#da532c"> <meta name="theme-color" content="#ffffff"> <link rel="stylesheet" type="text/css" media="screen" href="/static/browse/0.3.4/css/arXiv.css?v=20241206" /> <link rel="stylesheet" type="text/css" media="print" href="/static/browse/0.3.4/css/arXiv-print.css?v=20200611" /> <link rel="stylesheet" type="text/css" media="screen" href="/static/browse/0.3.4/css/browse_search.css" /> <script language="javascript" src="/static/browse/0.3.4/js/accordion.js" /></script> <script src="/static/browse/0.3.4/js/mathjaxToggle.min.js" type="text/javascript"></script> <script type="text/javascript" language="javascript">mathjaxToggle();</script> </head> <body class="with-cu-identity"> <div class="flex-wrap-footer"> <header> <a href="#content" class="is-sr-only">Skip to main content</a> <!-- start desktop header --> <div class="columns is-vcentered is-hidden-mobile" id="cu-identity"> <div class="column" id="cu-logo"> <a href="https://www.cornell.edu/"><img src="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg" alt="Cornell University" /></a> </div><div class="column" id="support-ack"> <span id="support-ack-url">We gratefully acknowledge support from the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors.</span> <a href="https://info.arxiv.org/about/donate.html" class="btn-header-donate">Donate</a> </div> </div> <div id="header" class="is-hidden-mobile"> <a aria-hidden="true" tabindex="-1" href="/IgnoreMe"></a> <div class="header-breadcrumbs"> <a href="/"><img src="/static/browse/0.3.4/images/arxiv-logo-one-color-white.svg" alt="arxiv logo" style="height:40px;"/></a> <span>></span> <a href="/list/cs.LG/recent">cs.LG</a> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div><!-- /end desktop header --> <div class="mobile-header"> <div class="columns is-mobile"> <div class="column logo-arxiv"><a href="https://arxiv.org/"><img src="/static/browse/0.3.4/images/arxiv-logomark-small-white.svg" alt="arXiv logo" style="height:60px;" /></a></div> <div class="column logo-cornell"><a href="https://www.cornell.edu/"> <picture> <source media="(min-width: 501px)" srcset="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg 400w" sizes="400w" /> <source srcset="/static/browse/0.3.4/images/icons/cu/cornell_seal_simple_black.svg 2x" /> <img src="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg" alt="Cornell University Logo" /> </picture> </a></div> <div class="column nav" id="toggle-container" role="menubar"> <button class="toggle-control"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-white"><title>open search</title><path d="M505 442.7L405.3 343c-4.5-4.5-10.6-7-17-7H372c27.6-35.3 44-79.7 44-128C416 93.1 322.9 0 208 0S0 93.1 0 208s93.1 208 208 208c48.3 0 92.7-16.4 128-44v16.3c0 6.4 2.5 12.5 7 17l99.7 99.7c9.4 9.4 24.6 9.4 33.9 0l28.3-28.3c9.4-9.4 9.4-24.6.1-34zM208 336c-70.7 0-128-57.2-128-128 0-70.7 57.2-128 128-128 70.7 0 128 57.2 128 128 0 70.7-57.2 128-128 128z"/></svg></button> <div class="mobile-toggle-block toggle-target"> <form class="mobile-search-form" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <input class="input" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <input type="hidden" name="source" value="header"> <input type="hidden" name="searchtype" value="all"> <button class="button">GO</button> </div> </form> </div> <button class="toggle-control"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-white" role="menu"><title>open navigation menu</title><path d="M16 132h416c8.837 0 16-7.163 16-16V76c0-8.837-7.163-16-16-16H16C7.163 60 0 67.163 0 76v40c0 8.837 7.163 16 16 16zm0 160h416c8.837 0 16-7.163 16-16v-40c0-8.837-7.163-16-16-16H16c-8.837 0-16 7.163-16 16v40c0 8.837 7.163 16 16 16zm0 160h416c8.837 0 16-7.163 16-16v-40c0-8.837-7.163-16-16-16H16c-8.837 0-16 7.163-16 16v40c0 8.837 7.163 16 16 16z"/ ></svg></button> <div class="mobile-toggle-block toggle-target"> <nav class="mobile-menu" aria-labelledby="mobilemenulabel"> <h2 id="mobilemenulabel">quick links</h2> <ul> <li><a href="https://arxiv.org/login">Login</a></li> <li><a href="https://info.arxiv.org/help">Help Pages</a></li> <li><a href="https://info.arxiv.org/about">About</a></li> </ul> </nav> </div> </div> </div> </div><!-- /end mobile-header --> </header> <main> <div id="content"> <div id='content-inner'> <div id='dlpage'> <h1>Machine Learning</h1> <ul> <li><a href="#item0">New submissions</a></li> <li><a href="#item82">Cross-lists</a></li> <li><a href="#item170">Replacements</a></li> </ul> <p>See <a id="recent-cs.LG" aria-labelledby="recent-cs.LG" href="/list/cs.LG/recent">recent</a> articles</p> <h3>Showing new listings for Wednesday, 19 March 2025</h3> <div class='paging'>Total of 291 entries </div> <div class='morefewer'>Showing up to 2000 entries per page: <a href=/list/cs.LG/new?skip=0&show=1000 rel="nofollow"> fewer</a> | <span style="color: #454545">more</span> | <span style="color: #454545">all</span> </div> <dl id='articles'> <h3>New submissions (showing 81 of 81 entries)</h3> <dt> <a name='item1'>[1]</a> <a href ="/abs/2503.13500" title="Abstract" id="2503.13500"> arXiv:2503.13500 </a> [<a href="/pdf/2503.13500" title="Download PDF" id="pdf-2503.13500" aria-labelledby="pdf-2503.13500">pdf</a>, <a href="https://arxiv.org/html/2503.13500v1" title="View HTML" id="html-2503.13500" aria-labelledby="html-2503.13500" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13500" title="Other formats" id="oth-2503.13500" aria-labelledby="oth-2503.13500">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Long-horizon Visual Instruction Generation with Logic and Attribute Self-reflection </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Suo,+Y">Yucheng Suo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ma,+F">Fan Ma</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shen,+K">Kaixin Shen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+L">Linchao Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+Y">Yi Yang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> ICLR 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Visual instructions for long-horizon tasks are crucial as they intuitively clarify complex concepts and enhance retention across extended steps. Directly generating a series of images using text-to-image models without considering the context of previous steps results in inconsistent images, increasing cognitive load. Additionally, the generated images often miss objects or the attributes such as color, shape, and state of the objects are inaccurate. To address these challenges, we propose LIGER, the first training-free framework for Long-horizon Instruction GEneration with logic and attribute self-Reflection. LIGER first generates a draft image for each step with the historical prompt and visual memory of previous steps. This step-by-step generation approach maintains consistency between images in long-horizon tasks. Moreover, LIGER utilizes various image editing tools to rectify errors including wrong attributes, logic errors, object redundancy, and identity inconsistency in the draft images. Through this self-reflection mechanism, LIGER improves the logic and object attribute correctness of the images. To verify whether the generated images assist human understanding, we manually curated a new benchmark consisting of various long-horizon tasks. Human-annotated ground truth expressions reflect the human-defined criteria for how an image should appear to be illustrative. Experiments demonstrate the visual instructions generated by LIGER are more comprehensive compared with baseline methods. </p> </div> </dd> <dt> <a name='item2'>[2]</a> <a href ="/abs/2503.13503" title="Abstract" id="2503.13503"> arXiv:2503.13503 </a> [<a href="/pdf/2503.13503" title="Download PDF" id="pdf-2503.13503" aria-labelledby="pdf-2503.13503">pdf</a>, <a href="https://arxiv.org/html/2503.13503v1" title="View HTML" id="html-2503.13503" aria-labelledby="html-2503.13503" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13503" title="Other formats" id="oth-2503.13503" aria-labelledby="oth-2503.13503">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> SciHorizon: Benchmarking AI-for-Science Readiness from Scientific Data to Large Language Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Qin,+C">Chuan Qin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+X">Xin Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+C">Chengrui Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+P">Pengmin Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+X">Xi Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cheng,+Y">Yihang Cheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+J">Jingyi Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xiao,+M">Meng Xiao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dong,+X">Xiangchao Dong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Long,+Q">Qingqing Long</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pan,+B">Boya Pan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+H">Han Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+C">Chengzan Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+Y">Yuanchun Zhou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xiong,+H">Hui Xiong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+H">Hengshu Zhu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Computation and Language (cs.CL); Digital Libraries (cs.DL); Information Retrieval (cs.IR) </div> <p class='mathjax'> In recent years, the rapid advancement of Artificial Intelligence (AI) technologies, particularly Large Language Models (LLMs), has revolutionized the paradigm of scientific discovery, establishing AI-for-Science (AI4Science) as a dynamic and evolving field. However, there is still a lack of an effective framework for the overall assessment of AI4Science, particularly from a holistic perspective on data quality and model capability. Therefore, in this study, we propose SciHorizon, a comprehensive assessment framework designed to benchmark the readiness of AI4Science from both scientific data and LLM perspectives. First, we introduce a generalizable framework for assessing AI-ready scientific data, encompassing four key dimensions: Quality, FAIRness, Explainability, and Compliance which are subdivided into 15 sub-dimensions. Drawing on data resource papers published between 2018 and 2023 in peer-reviewed journals, we present recommendation lists of AI-ready datasets for both Earth and Life Sciences, making a novel and original contribution to the field. Concurrently, to assess the capabilities of LLMs across multiple scientific disciplines, we establish 16 assessment dimensions based on five core indicators Knowledge, Understanding, Reasoning, Multimodality, and Values spanning Mathematics, Physics, Chemistry, Life Sciences, and Earth and Space Sciences. Using the developed benchmark datasets, we have conducted a comprehensive evaluation of over 20 representative open-source and closed source LLMs. All the results are publicly available and can be accessed online at <a href="http://www.scihorizon.cn/en" rel="external noopener nofollow" class="link-external link-http">this http URL</a>. </p> </div> </dd> <dt> <a name='item3'>[3]</a> <a href ="/abs/2503.13504" title="Abstract" id="2503.13504"> arXiv:2503.13504 </a> [<a href="/pdf/2503.13504" title="Download PDF" id="pdf-2503.13504" aria-labelledby="pdf-2503.13504">pdf</a>, <a href="https://arxiv.org/html/2503.13504v1" title="View HTML" id="html-2503.13504" aria-labelledby="html-2503.13504" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13504" title="Other formats" id="oth-2503.13504" aria-labelledby="oth-2503.13504">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> CoCMT: Communication-Efficient Cross-Modal Transformer for Collaborative Perception </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+R">Rujia Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gao,+X">Xiangbo Gao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xiang,+H">Hao Xiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+R">Runsheng Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tu,+Z">Zhengzhong Tu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Computer Vision and Pattern Recognition (cs.CV); Robotics (cs.RO) </div> <p class='mathjax'> Multi-agent collaborative perception enhances each agent perceptual capabilities by sharing sensing information to cooperatively perform robot perception tasks. This approach has proven effective in addressing challenges such as sensor deficiencies, occlusions, and long-range perception. However, existing representative collaborative perception systems transmit intermediate feature maps, such as bird-eye view (BEV) representations, which contain a significant amount of non-critical information, leading to high communication bandwidth requirements. To enhance communication efficiency while preserving perception capability, we introduce CoCMT, an object-query-based collaboration framework that optimizes communication bandwidth by selectively extracting and transmitting essential features. Within CoCMT, we introduce the Efficient Query Transformer (EQFormer) to effectively fuse multi-agent object queries and implement a synergistic deep supervision to enhance the positive reinforcement between stages, leading to improved overall performance. Experiments on OPV2V and V2V4Real datasets show CoCMT outperforms state-of-the-art methods while drastically reducing communication needs. On V2V4Real, our model (Top-50 object queries) requires only 0.416 Mb bandwidth, 83 times less than SOTA methods, while improving AP70 by 1.1 percent. This efficiency breakthrough enables practical collaborative perception deployment in bandwidth-constrained environments without sacrificing detection accuracy. </p> </div> </dd> <dt> <a name='item4'>[4]</a> <a href ="/abs/2503.13506" title="Abstract" id="2503.13506"> arXiv:2503.13506 </a> [<a href="/pdf/2503.13506" title="Download PDF" id="pdf-2503.13506" aria-labelledby="pdf-2503.13506">pdf</a>, <a href="https://arxiv.org/html/2503.13506v1" title="View HTML" id="html-2503.13506" aria-labelledby="html-2503.13506" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13506" title="Other formats" id="oth-2503.13506" aria-labelledby="oth-2503.13506">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> The Role of Hyperparameters in Predictive Multiplicity </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Cavus,+M">Mustafa Cavus</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wo%C5%BAnica,+K">Katarzyna Wo藕nica</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Biecek,+P">Przemys艂aw Biecek</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 16 pages, 4 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Machine Learning (stat.ML) </div> <p class='mathjax'> This paper investigates the critical role of hyperparameters in predictive multiplicity, where different machine learning models trained on the same dataset yield divergent predictions for identical inputs. These inconsistencies can seriously impact high-stakes decisions such as credit assessments, hiring, and medical diagnoses. Focusing on six widely used models for tabular data - Elastic Net, Decision Tree, k-Nearest Neighbor, Support Vector Machine, Random Forests, and Extreme Gradient Boosting - we explore how hyperparameter tuning influences predictive multiplicity, as expressed by the distribution of prediction discrepancies across benchmark datasets. Key hyperparameters such as lambda in Elastic Net, gamma in Support Vector Machines, and alpha in Extreme Gradient Boosting play a crucial role in shaping predictive multiplicity, often compromising the stability of predictions within specific algorithms. Our experiments on 21 benchmark datasets reveal that tuning these hyperparameters leads to notable performance improvements but also increases prediction discrepancies, with Extreme Gradient Boosting exhibiting the highest discrepancy and substantial prediction instability. This highlights the trade-off between performance optimization and prediction consistency, raising concerns about the risk of arbitrary predictions. These findings provide insight into how hyperparameter optimization leads to predictive multiplicity. While predictive multiplicity allows prioritizing domain-specific objectives such as fairness and reduces reliance on a single model, it also complicates decision-making, potentially leading to arbitrary or unjustified outcomes. </p> </div> </dd> <dt> <a name='item5'>[5]</a> <a href ="/abs/2503.13509" title="Abstract" id="2503.13509"> arXiv:2503.13509 </a> [<a href="/pdf/2503.13509" title="Download PDF" id="pdf-2503.13509" aria-labelledby="pdf-2503.13509">pdf</a>, <a href="https://arxiv.org/html/2503.13509v1" title="View HTML" id="html-2503.13509" aria-labelledby="html-2503.13509" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13509" title="Other formats" id="oth-2503.13509" aria-labelledby="oth-2503.13509">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> MentalChat16K: A Benchmark Dataset for Conversational Mental Health Assistance </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+J">Jia Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wei,+T">Tianyi Wei</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hou,+B">Bojian Hou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Orzechowski,+P">Patryk Orzechowski</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+S">Shu Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jin,+R">Ruochen Jin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Paulbeck,+R">Rachael Paulbeck</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wagenaar,+J">Joost Wagenaar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Demiris,+G">George Demiris</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shen,+L">Li Shen</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Computation and Language (cs.CL); Computers and Society (cs.CY); Human-Computer Interaction (cs.HC) </div> <p class='mathjax'> We introduce MentalChat16K, an English benchmark dataset combining a synthetic mental health counseling dataset and a dataset of anonymized transcripts from interventions between Behavioral Health Coaches and Caregivers of patients in palliative or hospice care. Covering a diverse range of conditions like depression, anxiety, and grief, this curated dataset is designed to facilitate the development and evaluation of large language models for conversational mental health assistance. By providing a high-quality resource tailored to this critical domain, MentalChat16K aims to advance research on empathetic, personalized AI solutions to improve access to mental health support services. The dataset prioritizes patient privacy, ethical considerations, and responsible data usage. MentalChat16K presents a valuable opportunity for the research community to innovate AI technologies that can positively impact mental well-being. </p> </div> </dd> <dt> <a name='item6'>[6]</a> <a href ="/abs/2503.13530" title="Abstract" id="2503.13530"> arXiv:2503.13530 </a> [<a href="/pdf/2503.13530" title="Download PDF" id="pdf-2503.13530" aria-labelledby="pdf-2503.13530">pdf</a>, <a href="/format/2503.13530" title="Other formats" id="oth-2503.13530" aria-labelledby="oth-2503.13530">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Cognitive Activation and Chaotic Dynamics in Large Language Models: A Quasi-Lyapunov Analysis of Reasoning Mechanisms </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+X">Xiaojian Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Leng,+Y">Yongkang Leng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ding,+R">Ruiqing Ding</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mo,+H">Hangjie Mo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+S">Shanlin Yang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> The human-like reasoning capabilities exhibited by Large Language Models (LLMs) challenge the traditional neural network theory's understanding of the flexibility of fixed-parameter systems. This paper proposes the "Cognitive Activation" theory, revealing the essence of LLMs' reasoning mechanisms from the perspective of dynamic systems: the model's reasoning ability stems from a chaotic process of dynamic information extraction in the parameter space. By introducing the Quasi-Lyapunov Exponent (QLE), we quantitatively analyze the chaotic characteristics of the model at different layers. Experiments show that the model's information accumulation follows a nonlinear exponential law, and the Multilayer Perceptron (MLP) accounts for a higher proportion in the final output than the attention mechanism. Further experiments indicate that minor initial value perturbations will have a substantial impact on the model's reasoning ability, confirming the theoretical analysis that large language models are chaotic systems. This research provides a chaos theory framework for the interpretability of LLMs' reasoning and reveals potential pathways for balancing creativity and reliability in model design. </p> </div> </dd> <dt> <a name='item7'>[7]</a> <a href ="/abs/2503.13534" title="Abstract" id="2503.13534"> arXiv:2503.13534 </a> [<a href="/pdf/2503.13534" title="Download PDF" id="pdf-2503.13534" aria-labelledby="pdf-2503.13534">pdf</a>, <a href="/format/2503.13534" title="Other formats" id="oth-2503.13534" aria-labelledby="oth-2503.13534">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Multi-output Classification for Compound Fault Diagnosis in Motor under Partially Labeled Target Domain </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Yi,+W">Wonjun Yi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Park,+Y">Yong-Hwa Park</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> This study presents a novel multi-output classification (MOC) framework designed for domain adaptation in fault diagnosis, addressing challenges posed by partially labeled (PL) target domain dataset and coexisting faults in rotating machinery. Unlike conventional multi-class classification (MCC) approaches, the MOC framework independently classifies the severity of each fault, enhancing diagnostic accuracy. By integrating multi-kernel maximum mean discrepancy loss (MKMMD) and entropy minimization loss (EM), the proposed method improves feature transferability between source and target domains, while frequency layer normalization (FLN) effectively handles stationary vibration signals by leveraging mechanical characteristics. Experimental evaluations across six domain adaptation cases, encompassing partially labeled (PL) scenarios, demonstrate the superior performance of the MOC approach over baseline methods in terms of macro F1 score. </p> </div> </dd> <dt> <a name='item8'>[8]</a> <a href ="/abs/2503.13537" title="Abstract" id="2503.13537"> arXiv:2503.13537 </a> [<a href="/pdf/2503.13537" title="Download PDF" id="pdf-2503.13537" aria-labelledby="pdf-2503.13537">pdf</a>, <a href="https://arxiv.org/html/2503.13537v1" title="View HTML" id="html-2503.13537" aria-labelledby="html-2503.13537" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13537" title="Other formats" id="oth-2503.13537" aria-labelledby="oth-2503.13537">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> FedTilt: Towards Multi-Level Fairness-Preserving and Robust Federated Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+B">Binghui Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=De+La+Cruz,+L+M">Luis Mares De La Cruz</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+B">Binghui Wang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 13 pages </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Distributed, Parallel, and Cluster Computing (cs.DC) </div> <p class='mathjax'> Federated Learning (FL) is an emerging decentralized learning paradigm that can partly address the privacy concern that cannot be handled by traditional centralized and distributed learning. Further, to make FL practical, it is also necessary to consider constraints such as fairness and robustness. However, existing robust FL methods often produce unfair models, and existing fair FL methods only consider one-level (client) fairness and are not robust to persistent outliers (i.e., injected outliers into each training round) that are common in real-world FL settings. We propose \texttt{FedTilt}, a novel FL that can preserve multi-level fairness and be robust to outliers. In particular, we consider two common levels of fairness, i.e., \emph{client fairness} -- uniformity of performance across clients, and \emph{client data fairness} -- uniformity of performance across different classes of data within a client. \texttt{FedTilt} is inspired by the recently proposed tilted empirical risk minimization, which introduces tilt hyperparameters that can be flexibly tuned. Theoretically, we show how tuning tilt values can achieve the two-level fairness and mitigate the persistent outliers, and derive the convergence condition of \texttt{FedTilt} as well. Empirically, our evaluation results on a suite of realistic federated datasets in diverse settings show the effectiveness and flexibility of the \texttt{FedTilt} framework and the superiority to the state-of-the-arts. </p> </div> </dd> <dt> <a name='item9'>[9]</a> <a href ="/abs/2503.13538" title="Abstract" id="2503.13538"> arXiv:2503.13538 </a> [<a href="/pdf/2503.13538" title="Download PDF" id="pdf-2503.13538" aria-labelledby="pdf-2503.13538">pdf</a>, <a href="https://arxiv.org/html/2503.13538v1" title="View HTML" id="html-2503.13538" aria-labelledby="html-2503.13538" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13538" title="Other formats" id="oth-2503.13538" aria-labelledby="oth-2503.13538">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> From Demonstrations to Rewards: Alignment Without Explicit Human Preferences </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zeng,+S">Siliang Zeng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Y">Yao Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rangwala,+H">Huzefa Rangwala</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Karypis,+G">George Karypis</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hong,+M">Mingyi Hong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fakoor,+R">Rasool Fakoor</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> One of the challenges of aligning large models with human preferences lies in both the data requirements and the technical complexities of current approaches. Predominant methods, such as RLHF, involve multiple steps, each demanding distinct types of data, including demonstration data and preference data. In RLHF, human preferences are typically modeled through a reward model, which serves as a proxy to guide policy learning during the reinforcement learning stage, ultimately producing a policy aligned with human preferences. However, in this paper, we propose a fresh perspective on learning alignment based on inverse reinforcement learning principles, where the optimal policy is still derived from reward maximization. However, instead of relying on preference data, we directly learn the reward model from demonstration data. This new formulation offers the flexibility to be applied even when only demonstration data is available, a capability that current RLHF methods lack, and it also shows that demonstration data offers more utility than what conventional wisdom suggests. Our extensive evaluation, based on public reward benchmark, HuggingFace Open LLM Leaderboard and MT-Bench, demonstrates that our approach compares favorably to state-of-the-art methods that rely solely on demonstration data. </p> </div> </dd> <dt> <a name='item10'>[10]</a> <a href ="/abs/2503.13540" title="Abstract" id="2503.13540"> arXiv:2503.13540 </a> [<a href="/pdf/2503.13540" title="Download PDF" id="pdf-2503.13540" aria-labelledby="pdf-2503.13540">pdf</a>, <a href="https://arxiv.org/html/2503.13540v1" title="View HTML" id="html-2503.13540" aria-labelledby="html-2503.13540" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13540" title="Other formats" id="oth-2503.13540" aria-labelledby="oth-2503.13540">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> MSCMHMST: A traffic flow prediction model based on Transformer </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Geng,+W">Weiyang Geng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pan,+Y">Yiming Pan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xing,+Z">Zhecong Xing</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+D">Dongyu Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+R">Rui Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+Y">Yuan Zhu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> This study proposes a hybrid model based on Transformers, named MSCMHMST, aimed at addressing key challenges in traffic flow prediction. Traditional single-method approaches show limitations in traffic prediction tasks, whereas hybrid methods, by integrating the strengths of different models, can provide more accurate and robust predictions. The MSCMHMST model introduces a multi-head, multi-scale attention mechanism, allowing the model to parallel process different parts of the data and learn its intrinsic representations from multiple perspectives, thereby enhancing the model's ability to handle complex situations. This mechanism enables the model to capture features at various scales effectively, understanding both short-term changes and long-term trends. Verified through experiments on the PeMS04/08 dataset with specific experimental settings, the MSCMHMST model demonstrated excellent robustness and accuracy in long, medium, and short-term traffic flow predictions. The results indicate that this model has significant potential, offering a new and effective solution for the field of traffic flow prediction. </p> </div> </dd> <dt> <a name='item11'>[11]</a> <a href ="/abs/2503.13542" title="Abstract" id="2503.13542"> arXiv:2503.13542 </a> [<a href="/pdf/2503.13542" title="Download PDF" id="pdf-2503.13542" aria-labelledby="pdf-2503.13542">pdf</a>, <a href="https://arxiv.org/html/2503.13542v1" title="View HTML" id="html-2503.13542" aria-labelledby="html-2503.13542" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13542" title="Other formats" id="oth-2503.13542" aria-labelledby="oth-2503.13542">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> HAR-DoReMi: Optimizing Data Mixture for Self-Supervised Human Activity Recognition Across Heterogeneous IMU Datasets </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ban,+L">Lulu Ban</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+T">Tao Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lu,+X">Xiangqing Lu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Qiu,+Q">Qi Qiu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Han,+W">Wenyong Han</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+S">Shuangjian Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+L">Liming Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+K+I">Kevin I-Kai Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nie,+M">Mingxing Nie</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wan,+Y">Yaping Wan</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Cross-dataset Human Activity Recognition (HAR) suffers from limited model generalization, hindering its practical deployment. To address this critical challenge, inspired by the success of DoReMi in Large Language Models (LLMs), we introduce a data mixture optimization strategy for pre-training HAR models, aiming to improve the recognition performance across heterogeneous datasets. However, directly applying DoReMi to the HAR field encounters new challenges due to the continuous, multi-channel and intrinsic heterogeneous characteristics of IMU sensor data. To overcome these limitations, we propose a novel framework HAR-DoReMi, which introduces a masked reconstruction task based on Mean Squared Error (MSE) loss. By raplacing the discrete language sequence prediction task, which relies on the Negative Log-Likelihood (NLL) loss, in the original DoReMi framework, the proposed framework is inherently more appropriate for handling the continuous and multi-channel characteristics of IMU data. In addition, HAR-DoReMi integrates the Mahony fusion algorithm into the self-supervised HAR pre-training, aiming to mitigate the heterogeneity of varying sensor orientation. This is achieved by estimating the sensor orientation within each dataset and facilitating alignment with a unified coordinate system, thereby improving the cross-dataset generalization ability of the HAR model. Experimental evaluation on multiple cross-dataset HAR transfer tasks demonstrates that HAR-DoReMi improves the accuracy by an average of 6.51%, compared to the current state-of-the-art method with only approximately 30% to 50% of the data usage. These results confirm the effectiveness of HAR-DoReMi in improving the generalization and data efficiency of pre-training HAR models, underscoring its significant potential to facilitate the practical deployment of HAR technology. </p> </div> </dd> <dt> <a name='item12'>[12]</a> <a href ="/abs/2503.13543" title="Abstract" id="2503.13543"> arXiv:2503.13543 </a> [<a href="/pdf/2503.13543" title="Download PDF" id="pdf-2503.13543" aria-labelledby="pdf-2503.13543">pdf</a>, <a href="https://arxiv.org/html/2503.13543v1" title="View HTML" id="html-2503.13543" aria-labelledby="html-2503.13543" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13543" title="Other formats" id="oth-2503.13543" aria-labelledby="oth-2503.13543">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Enhancing Visual Representation with Textual Semantics: Textual Semantics-Powered Prototypes for Heterogeneous Federated Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+X">Xinghao Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Niu,+J">Jianwei Niu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+X">Xuefeng Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+G">Guogang Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+J">Jiayuan Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tang,+S">Shaojie Tang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 13 pages, 8 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Federated Prototype Learning (FedPL) has emerged as an effective strategy for handling data heterogeneity in Federated Learning (FL). In FedPL, clients collaboratively construct a set of global feature centers (prototypes), and let local features align with these prototypes to mitigate the effects of data heterogeneity. The performance of FedPL highly depends on the quality of prototypes. Existing methods assume that larger inter-class distances among prototypes yield better performance, and thus design different methods to increase these distances. However, we observe that while these methods increase prototype distances to enhance class discrimination, they inevitably disrupt essential semantic relationships among classes, which are crucial for model generalization. This raises an important question: how to construct prototypes that inherently preserve semantic relationships among classes? Directly learning these relationships from limited and heterogeneous client data can be problematic in FL. Recently, the success of pre-trained language models (PLMs) demonstrates their ability to capture semantic relationships from vast textual corpora. Motivated by this, we propose FedTSP, a novel method that leverages PLMs to construct semantically enriched prototypes from the textual modality, enabling more effective collaboration in heterogeneous data settings. We first use a large language model (LLM) to generate fine-grained textual descriptions for each class, which are then processed by a PLM on the server to form textual prototypes. To address the modality gap between client image models and the PLM, we introduce trainable prompts, allowing prototypes to adapt better to client tasks. Extensive experiments demonstrate that FedTSP mitigates data heterogeneity while significantly accelerating convergence. </p> </div> </dd> <dt> <a name='item13'>[13]</a> <a href ="/abs/2503.13544" title="Abstract" id="2503.13544"> arXiv:2503.13544 </a> [<a href="/pdf/2503.13544" title="Download PDF" id="pdf-2503.13544" aria-labelledby="pdf-2503.13544">pdf</a>, <a href="https://arxiv.org/html/2503.13544v1" title="View HTML" id="html-2503.13544" aria-labelledby="html-2503.13544" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13544" title="Other formats" id="oth-2503.13544" aria-labelledby="oth-2503.13544">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Semi-Decision-Focused Learning with Deep Ensembles: A Practical Framework for Robust Portfolio Optimization </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+J">Juhyeong Kim</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> ICLR 2025 Advances in Financial AI Workshop </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Computational Finance (q-fin.CP); Portfolio Management (q-fin.PM) </div> <p class='mathjax'> I propose Semi-Decision-Focused Learning, a practical adaptation of Decision-Focused Learning for portfolio optimization. Rather than directly optimizing complex financial metrics, I employ simple target portfolios (Max-Sortino or One-Hot) and train models with a convex, cross-entropy loss. I further incorporate Deep Ensemble methods to reduce variance and stabilize performance. Experiments on two universes (one upward-trending and another range-bound) show consistent outperformance over baseline portfolios, demonstrating the effectiveness and robustness of my approach. Code is available at <a href="https://github.com/sDFLwDE/sDFLwDE" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </p> </div> </dd> <dt> <a name='item14'>[14]</a> <a href ="/abs/2503.13545" title="Abstract" id="2503.13545"> arXiv:2503.13545 </a> [<a href="/pdf/2503.13545" title="Download PDF" id="pdf-2503.13545" aria-labelledby="pdf-2503.13545">pdf</a>, <a href="https://arxiv.org/html/2503.13545v1" title="View HTML" id="html-2503.13545" aria-labelledby="html-2503.13545" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13545" title="Other formats" id="oth-2503.13545" aria-labelledby="oth-2503.13545">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Optimization on black-box function by parameter-shift rule </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Hai,+V+T">Vu Tuan Hai</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> Machine learning has been widely applied in many aspects, but training a machine learning model is increasingly difficult. There are more optimization problems named "black-box" where the relationship between model parameters and outcomes is uncertain or complex to trace. Currently, optimizing black-box models that need a large number of query observations and parameters becomes difficult. To overcome the drawbacks of the existing algorithms, in this study, we propose a zeroth-order method that originally came from quantum computing called the parameter-shift rule, which has used a lesser number of parameters than previous methods. </p> </div> </dd> <dt> <a name='item15'>[15]</a> <a href ="/abs/2503.13546" title="Abstract" id="2503.13546"> arXiv:2503.13546 </a> [<a href="/pdf/2503.13546" title="Download PDF" id="pdf-2503.13546" aria-labelledby="pdf-2503.13546">pdf</a>, <a href="https://arxiv.org/html/2503.13546v1" title="View HTML" id="html-2503.13546" aria-labelledby="html-2503.13546" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13546" title="Other formats" id="oth-2503.13546" aria-labelledby="oth-2503.13546">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> CNCast: Leveraging 3D Swin Transformer and DiT for Enhanced Regional Weather Forecasting </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Liang,+H">Hongli Liang</a> (1), <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Y">Yuanting Zhang</a> (1), <a href="https://arxiv.org/search/cs?searchtype=author&query=Meng,+Q">Qingye Meng</a> (1), <a href="https://arxiv.org/search/cs?searchtype=author&query=He,+S">Shuangshuang He</a> (1), <a href="https://arxiv.org/search/cs?searchtype=author&query=Yuan,+X">Xingyuan Yuan</a> (1) ((1) ColorfulClouds Technology Co., Ltd)</div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> This study introduces a cutting-edge regional weather forecasting model based on the SwinTransformer 3D architecture. This model is specifically designed to deliver precise hourly weather predictions ranging from 1 hour to 5 days, significantly improving the reliability and practicality of short-term weather forecasts. Our model has demonstrated generally superior performance when compared to Pangu, a well-established global model. The evaluation indicates that our model excels in predicting most weather variables, highlighting its potential as a more effective alternative in the field of limited area modeling. A noteworthy feature of this model is the integration of enhanced boundary conditions, inspired by traditional numerical weather prediction (NWP) techniques. This integration has substantially improved the model's predictive accuracy. Additionally, the model includes an innovative approach for diagnosing hourly total precipitation at a high spatial resolution of approximately 5 kilometers. This is achieved through a latent diffusion model, offering an alternative method for generating high-resolution precipitation data. </p> </div> </dd> <dt> <a name='item16'>[16]</a> <a href ="/abs/2503.13548" title="Abstract" id="2503.13548"> arXiv:2503.13548 </a> [<a href="/pdf/2503.13548" title="Download PDF" id="pdf-2503.13548" aria-labelledby="pdf-2503.13548">pdf</a>, <a href="/format/2503.13548" title="Other formats" id="oth-2503.13548" aria-labelledby="oth-2503.13548">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Fuzzy Rule-based Differentiable Representation Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+W">Wei Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Deng,+Z">Zhaohong Deng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+G">Guanjin Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Choi,+K">Kup-Sze Choi</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Representation learning has emerged as a crucial focus in machine and deep learning, involving the extraction of meaningful and useful features and patterns from the input data, thereby enhancing the performance of various downstream tasks such as classification, clustering, and prediction. Current mainstream representation learning methods primarily rely on non-linear data mining techniques such as kernel methods and deep neural networks to extract abstract knowledge from complex datasets. However, most of these methods are black-box, lacking transparency and interpretability in the learning process, which constrains their practical utility. To this end, this paper introduces a novel representation learning method grounded in an interpretable fuzzy rule-based model. Specifically, it is built upon the Takagi-Sugeno-Kang fuzzy system (TSK-FS) to initially map input data to a high-dimensional fuzzy feature space through the antecedent part of the TSK-FS. Subsequently, a novel differentiable optimization method is proposed for the consequence part learning which can preserve the model's interpretability and transparency while further exploring the nonlinear relationships within the data. This optimization method retains the essence of traditional optimization, with certain parts of the process parameterized corresponding differentiable modules constructed, and a deep optimization process implemented. Consequently, this method not only enhances the model's performance but also ensures its interpretability. Moreover, a second-order geometry preservation method is introduced to further improve the robustness of the proposed method. Extensive experiments conducted on various benchmark datasets validate the superiority of the proposed method, highlighting its potential for advancing representation learning methodologies. </p> </div> </dd> <dt> <a name='item17'>[17]</a> <a href ="/abs/2503.13550" title="Abstract" id="2503.13550"> arXiv:2503.13550 </a> [<a href="/pdf/2503.13550" title="Download PDF" id="pdf-2503.13550" aria-labelledby="pdf-2503.13550">pdf</a>, <a href="/format/2503.13550" title="Other formats" id="oth-2503.13550" aria-labelledby="oth-2503.13550">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Towards Privacy-Preserving Data-Driven Education: The Potential of Federated Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Khalil,+M">Mohammad Khalil</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shakya,+R">Ronas Shakya</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Q">Qinyi Liu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Cryptography and Security (cs.CR) </div> <p class='mathjax'> The increasing adoption of data-driven applications in education such as in learning analytics and AI in education has raised significant privacy and data protection concerns. While these challenges have been widely discussed in previous works, there are still limited practical solutions. Federated learning has recently been discoursed as a promising privacy-preserving technique, yet its application in education remains scarce. This paper presents an experimental evaluation of federated learning for educational data prediction, comparing its performance to traditional non-federated approaches. Our findings indicate that federated learning achieves comparable predictive accuracy. Furthermore, under adversarial attacks, federated learning demonstrates greater resilience compared to non-federated settings. We summarise that our results reinforce the value of federated learning as a potential approach for balancing predictive performance and privacy in educational contexts. </p> </div> </dd> <dt> <a name='item18'>[18]</a> <a href ="/abs/2503.13557" title="Abstract" id="2503.13557"> arXiv:2503.13557 </a> [<a href="/pdf/2503.13557" title="Download PDF" id="pdf-2503.13557" aria-labelledby="pdf-2503.13557">pdf</a>, <a href="https://arxiv.org/html/2503.13557v1" title="View HTML" id="html-2503.13557" aria-labelledby="html-2503.13557" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13557" title="Other formats" id="oth-2503.13557" aria-labelledby="oth-2503.13557">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> APF+: Boosting adaptive-potential function reinforcement learning methods with a W-shaped network for high-dimensional games </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+Y">Yifei Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Schomaker,+L">Lambert Schomaker</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 46 pages </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Studies in reward shaping for reinforcement learning (RL) have flourished in recent years due to its ability to speed up training. Our previous work proposed an adaptive potential function (APF) and showed that APF can accelerate the Q-learning with a Multi-layer Perceptron algorithm in the low-dimensional domain. This paper proposes to extend APF with an encoder (APF+) for RL state representation, allowing applying APF to the pixel-based Atari games using a state-encoding method that projects high-dimensional game's pixel frames to low-dimensional embeddings. We approach by designing the state-representation encoder as a W-shaped network (W-Net), by using which we are able to encode both the background as well as the moving entities in the game frames. Specifically, the embeddings derived from the pre-trained W-Net consist of two latent vectors: One represents the input state, and the other represents the deviation of the input state's representation from itself. We then incorporate W-Net into APF to train a downstream Dueling Deep Q-Network (DDQN), obtain the APF-WNet-DDQN, and demonstrate its effectiveness in Atari game-playing tasks. To evaluate the APF+W-Net module in such high-dimensional tasks, we compare with two types of baseline methods: (i) the basic DDQN; and (ii) two encoder-replaced APF-DDQN methods where we replace W-Net by (a) an unsupervised state representation method called Spatiotemporal Deep Infomax (ST-DIM) and (b) a ground truth state representation provided by the Atari Annotated RAM Interface (ARI). The experiment results show that out of 20 Atari games, APF-WNet-DDQN outperforms DDQN (14/20 games) and APF-STDIM-DDQN (13/20 games) significantly. In comparison against the APF-ARI-DDQN which employs embeddings directly of the detailed game-internal state information, the APF-WNet-DDQN achieves a comparable performance. </p> </div> </dd> <dt> <a name='item19'>[19]</a> <a href ="/abs/2503.13559" title="Abstract" id="2503.13559"> arXiv:2503.13559 </a> [<a href="/pdf/2503.13559" title="Download PDF" id="pdf-2503.13559" aria-labelledby="pdf-2503.13559">pdf</a>, <a href="/format/2503.13559" title="Other formats" id="oth-2503.13559" aria-labelledby="oth-2503.13559">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Dynamical Mode Recognition of Turbulent Flames in a Swirl-stabilized Annular Combustor by a Time-series Learning Approach </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+T">Tao Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+W">Weiming Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+L">Liangliang Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+P">Peng Zhang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 5 pages, 3 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> Thermoacoustic instability in annular combustors, essential to aero engines and modern gas turbines, can severely impair operational stability and efficiency, accurately recognizing and understanding various combustion modes is the prerequisite for understanding and controlling combustion instabilities. However, the high-dimensional spatial-temporal dynamics of turbulent flames typically pose considerable challenges to mode recognition. Based on the bidirectional temporal and nonlinear dimensionality reduction models, this study introduces a two-layer bidirectional long short-term memory variational autoencoder, Bi-LSTM-VAE model, to effectively recognize dynamical modes in annular combustion systems. Specifically, leveraging 16 pressure signals from a swirl-stabilized annular combustor, the model maps complex dynamics into a low-dimensional latent space while preserving temporal dependency and nonlinear behavior features through the recurrent neural network structure. The results show that the novel Bi-LSTM-VAE method enables a clear representation of combustion states in two-dimensional state space. Analysis of latent variable distributions reveals distinct patterns corresponding to a wide range of equivalence ratios and premixed fuel and air mass flow rates, offering novel insights into mode classification and transitions, highlighting this model's potential for deciphering complex thermoacoustic phenomena. </p> </div> </dd> <dt> <a name='item20'>[20]</a> <a href ="/abs/2503.13570" title="Abstract" id="2503.13570"> arXiv:2503.13570 </a> [<a href="/pdf/2503.13570" title="Download PDF" id="pdf-2503.13570" aria-labelledby="pdf-2503.13570">pdf</a>, <a href="/format/2503.13570" title="Other formats" id="oth-2503.13570" aria-labelledby="oth-2503.13570">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> ExChanGeAI: An End-to-End Platform and Efficient Foundation Model for Electrocardiogram Analysis and Fine-tuning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Bickmann,+L">Lucas Bickmann</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Plagwitz,+L">Lucas Plagwitz</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=B%C3%BCscher,+A">Antonius B眉scher</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Eckardt,+L">Lars Eckardt</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Varghese,+J">Julian Varghese</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Electrocardiogram data, one of the most widely available biosignal data, has become increasingly valuable with the emergence of deep learning methods, providing novel insights into cardiovascular diseases and broader health conditions. However, heterogeneity of electrocardiogram formats, limited access to deep learning model weights and intricate algorithmic steps for effective fine-tuning for own disease target labels result in complex workflows. In this work, we introduce ExChanGeAI, a web-based end-to-end platform that streamlines the reading of different formats, pre-processing, visualization and custom machine learning with local and privacy-preserving fine-tuning. ExChanGeAI is adaptable for use on both personal computers and scalable to high performance server environments. The platform offers state-of-the-art deep learning models for training from scratch, alongside our novel open-source electrocardiogram foundation model CardX, pre-trained on over one million electrocardiograms. Evaluation across three external validation sets, including an entirely new testset extracted from routine care, demonstrate the fine-tuning capabilities of ExChanGeAI. CardX outperformed the benchmark foundation model while requiring significantly fewer parameters and lower computational resources. The platform enables users to empirically determine the most suitable model for their specific tasks based on systematic <a href="http://validations.The" rel="external noopener nofollow" class="link-external link-http">this http URL</a> code is available at <a href="https://imigitlab.uni-muenster.de/published/exchangeai" rel="external noopener nofollow" class="link-external link-https">this https URL</a> . </p> </div> </dd> <dt> <a name='item21'>[21]</a> <a href ="/abs/2503.13575" title="Abstract" id="2503.13575"> arXiv:2503.13575 </a> [<a href="/pdf/2503.13575" title="Download PDF" id="pdf-2503.13575" aria-labelledby="pdf-2503.13575">pdf</a>, <a href="https://arxiv.org/html/2503.13575v1" title="View HTML" id="html-2503.13575" aria-labelledby="html-2503.13575" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13575" title="Other formats" id="oth-2503.13575" aria-labelledby="oth-2503.13575">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Analytic Subspace Routing: How Recursive Least Squares Works in Continual Learning of Large Language Model </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Tong,+K">Kai Tong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pan,+K">Kang Pan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+X">Xiao Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Meng,+E">Erli Meng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=He,+R">Run He</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cui,+Y">Yawen Cui</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Guo,+N">Nuoyan Guo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhuang,+H">Huiping Zhuang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 11 pages, 4 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Computation and Language (cs.CL) </div> <p class='mathjax'> Large Language Models (LLMs) possess encompassing capabilities that can process diverse language-related tasks. However, finetuning on LLMs will diminish this general skills and continual finetuning will further cause severe degradation on accumulated knowledge. Recently, Continual Learning (CL) in Large Language Models (LLMs) arises which aims to continually adapt the LLMs to new tasks while maintaining previously learned knowledge and inheriting general skills. Existing techniques either leverage previous data to replay, leading to extra computational costs, or utilize a single parameter-efficient module to learn the downstream task, constraining new knowledge absorption with interference between different tasks. Toward these issues, this paper proposes Analytic Subspace Routing(ASR) to address these challenges. For each task, we isolate the learning within a subspace of deep layers' features via low-rank adaptation, eliminating knowledge interference between different tasks. Additionally, we propose an analytic routing mechanism to properly utilize knowledge learned in different subspaces. Our approach employs Recursive Least Squares to train a multi-task router model, allowing the router to dynamically adapt to incoming data without requiring access to historical data. Also, the router effectively assigns the current task to an appropriate subspace and has a non-forgetting property of previously learned tasks with a solid theoretical guarantee. Experimental results demonstrate that our method achieves near-perfect retention of prior knowledge while seamlessly integrating new information, effectively overcoming the core limitations of existing methods. Our code will be released after acceptance. </p> </div> </dd> <dt> <a name='item22'>[22]</a> <a href ="/abs/2503.13582" title="Abstract" id="2503.13582"> arXiv:2503.13582 </a> [<a href="/pdf/2503.13582" title="Download PDF" id="pdf-2503.13582" aria-labelledby="pdf-2503.13582">pdf</a>, <a href="https://arxiv.org/html/2503.13582v1" title="View HTML" id="html-2503.13582" aria-labelledby="html-2503.13582" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13582" title="Other formats" id="oth-2503.13582" aria-labelledby="oth-2503.13582">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Spectrally-Corrected and Regularized QDA Classifier for Spiked Covariance Model </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Luo,+W">Wenya Luo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+H">Hua Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bai,+Z">Zhidong Bai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Z">Zhijun Liu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Statistics Theory (math.ST) </div> <p class='mathjax'> Quadratic discriminant analysis (QDA) is a widely used method for classification problems, particularly preferable over Linear Discriminant Analysis (LDA) for heterogeneous data. However, QDA loses its effectiveness in high-dimensional settings, where the data dimension and sample size tend to infinity. To address this issue, we propose a novel QDA method utilizing spectral correction and regularization techniques, termed SR-QDA. The regularization parameters in our method are selected by maximizing the Fisher-discriminant ratio. We compare SR-QDA with QDA, regularized quadratic discriminant analysis (R-QDA), and several other competitors. The results indicate that SR-QDA performs exceptionally well, especially in moderate and high-dimensional situations. Empirical experiments across diverse datasets further support this conclusion. </p> </div> </dd> <dt> <a name='item23'>[23]</a> <a href ="/abs/2503.13623" title="Abstract" id="2503.13623"> arXiv:2503.13623 </a> [<a href="/pdf/2503.13623" title="Download PDF" id="pdf-2503.13623" aria-labelledby="pdf-2503.13623">pdf</a>, <a href="https://arxiv.org/html/2503.13623v1" title="View HTML" id="html-2503.13623" aria-labelledby="html-2503.13623" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13623" title="Other formats" id="oth-2503.13623" aria-labelledby="oth-2503.13623">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Convex formulation for linear discriminant analysis </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Surineela,+S+V+K">Sai Vijay Kumar Surineela</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kanakamalla,+P">Prathyusha Kanakamalla</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Harikumar,+H">Harigovind Harikumar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ghosh,+T">Tomojit Ghosh</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Total pages 29 including references, six figures, seven tables. Submitted to an Elsevier journal </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> We present a supervised dimensionality reduction technique called Convex Linear Discriminant Analysis (ConvexLDA). The proposed model optimizes a multi-objective cost function by balancing two complementary terms. The first term pulls the samples of a class towards its centroid by minimizing a sample's distance from its class-centroid in low dimensional space. The second term pushes the classes far apart by maximizing their hyperellipsoid scattering volume via the logarithm of the determinant (\textit{log det}) of the outer product matrix formed by the low-dimensional class-centroids. Using the negative of the \textit{log det}, we pose the final cost as a minimization problem, which balances the two terms using a hyper-parameter $\lambda$. We demonstrate that the cost function is convex. Unlike Fisher LDA, the proposed method doesn't require to compute the inverse of a matrix, hence avoiding any ill-conditioned problem where data dimension is very high, e.g. RNA-seq data. ConvexLDA doesn't require pair-wise distance calculation, making it faster and more easily scalable. Moreover, the convex nature of the cost function ensures global optimality, enhancing the reliability of the learned embedding. Our experimental evaluation demonstrates that ConvexLDA outperforms several popular linear discriminant analysis (LDA)-based methods on a range of high-dimensional biological data, image data sets, etc. </p> </div> </dd> <dt> <a name='item24'>[24]</a> <a href ="/abs/2503.13695" title="Abstract" id="2503.13695"> arXiv:2503.13695 </a> [<a href="/pdf/2503.13695" title="Download PDF" id="pdf-2503.13695" aria-labelledby="pdf-2503.13695">pdf</a>, <a href="https://arxiv.org/html/2503.13695v1" title="View HTML" id="html-2503.13695" aria-labelledby="html-2503.13695" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13695" title="Other formats" id="oth-2503.13695" aria-labelledby="oth-2503.13695">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Mitigating Spectral Bias in Neural Operators via High-Frequency Scaling for Physical Systems </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Khodakarami,+S">Siavash Khodakarami</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Oommen,+V">Vivek Oommen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bora,+A">Aniruddha Bora</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Karniadakis,+G+E">George Em Karniadakis</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Computational Physics (physics.comp-ph) </div> <p class='mathjax'> Neural operators have emerged as powerful surrogates for modeling complex physical problems. However, they suffer from spectral bias making them oblivious to high-frequency modes, which are present in multiscale physical systems. Therefore, they tend to produce over-smoothed solutions, which is particularly problematic in modeling turbulence and for systems with intricate patterns and sharp gradients such as multi-phase flow systems. In this work, we introduce a new approach named high-frequency scaling (HFS) to mitigate spectral bias in convolutional-based neural operators. By integrating HFS with proper variants of UNet neural operators, we demonstrate a higher prediction accuracy by mitigating spectral bias in single and two-phase flow problems. Unlike Fourier-based techniques, HFS is directly applied to the latent space, thus eliminating the computational cost associated with the Fourier transform. Additionally, we investigate alternative spectral bias mitigation through diffusion models conditioned on neural operators. While the diffusion model integrated with the standard neural operator may still suffer from significant errors, these errors are substantially reduced when the diffusion model is integrated with a HFS-enhanced neural operator. </p> </div> </dd> <dt> <a name='item25'>[25]</a> <a href ="/abs/2503.13709" title="Abstract" id="2503.13709"> arXiv:2503.13709 </a> [<a href="/pdf/2503.13709" title="Download PDF" id="pdf-2503.13709" aria-labelledby="pdf-2503.13709">pdf</a>, <a href="https://arxiv.org/html/2503.13709v1" title="View HTML" id="html-2503.13709" aria-labelledby="html-2503.13709" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13709" title="Other formats" id="oth-2503.13709" aria-labelledby="oth-2503.13709">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Multi-modal Time Series Analysis: A Tutorial and Survey </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Jiang,+Y">Yushan Jiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ning,+K">Kanghui Ning</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pan,+Z">Zijie Pan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shen,+X">Xuyang Shen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ni,+J">Jingchao Ni</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+W">Wenchao Yu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Schneider,+A">Anderson Schneider</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+H">Haifeng Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nevmyvaka,+Y">Yuriy Nevmyvaka</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Song,+D">Dongjin Song</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> Multi-modal time series analysis has recently emerged as a prominent research area in data mining, driven by the increasing availability of diverse data modalities, such as text, images, and structured tabular data from real-world sources. However, effective analysis of multi-modal time series is hindered by data heterogeneity, modality gap, misalignment, and inherent noise. Recent advancements in multi-modal time series methods have exploited the multi-modal context via cross-modal interactions based on deep learning methods, significantly enhancing various downstream tasks. In this tutorial and survey, we present a systematic and up-to-date overview of multi-modal time series datasets and methods. We first state the existing challenges of multi-modal time series analysis and our motivations, with a brief introduction of preliminaries. Then, we summarize the general pipeline and categorize existing methods through a unified cross-modal interaction framework encompassing fusion, alignment, and transference at different levels (\textit{i.e.}, input, intermediate, output), where key concepts and ideas are highlighted. We also discuss the real-world applications of multi-modal analysis for both standard and spatial time series, tailored to general and specific domains. Finally, we discuss future research directions to help practitioners explore and exploit multi-modal time series. The up-to-date resources are provided in the GitHub repository: <a href="https://github.com/UConn-DSIS/Multi-modal-Time-Series-Analysis" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </p> </div> </dd> <dt> <a name='item26'>[26]</a> <a href ="/abs/2503.13763" title="Abstract" id="2503.13763"> arXiv:2503.13763 </a> [<a href="/pdf/2503.13763" title="Download PDF" id="pdf-2503.13763" aria-labelledby="pdf-2503.13763">pdf</a>, <a href="https://arxiv.org/html/2503.13763v1" title="View HTML" id="html-2503.13763" aria-labelledby="html-2503.13763" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13763" title="Other formats" id="oth-2503.13763" aria-labelledby="oth-2503.13763">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Neural Edge Histogram Descriptors for Underwater Acoustic Target Recognition </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Agashe,+A">Atharva Agashe</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Carreiro,+D">Davelle Carreiro</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Van+Dine,+A">Alexandra Van Dine</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Peeples,+J">Joshua Peeples</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 6 pages, 5 figures. This work has been accepted to IEEE OCEANS 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Sound (cs.SD); Audio and Speech Processing (eess.AS) </div> <p class='mathjax'> Numerous maritime applications rely on the ability to recognize acoustic targets using passive sonar. While there is a growing reliance on pre-trained models for classification tasks, these models often require extensive computational resources and may not perform optimally when transferred to new domains due to dataset variations. To address these challenges, this work adapts the neural edge histogram descriptors (NEHD) method originally developed for image classification, to classify passive sonar signals. We conduct a comprehensive evaluation of statistical and structural texture features, demonstrating that their combination achieves competitive performance with large pre-trained models. The proposed NEHD-based approach offers a lightweight and efficient solution for underwater target recognition, significantly reducing computational costs while maintaining accuracy. </p> </div> </dd> <dt> <a name='item27'>[27]</a> <a href ="/abs/2503.13764" title="Abstract" id="2503.13764"> arXiv:2503.13764 </a> [<a href="/pdf/2503.13764" title="Download PDF" id="pdf-2503.13764" aria-labelledby="pdf-2503.13764">pdf</a>, <a href="https://arxiv.org/html/2503.13764v1" title="View HTML" id="html-2503.13764" aria-labelledby="html-2503.13764" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13764" title="Other formats" id="oth-2503.13764" aria-labelledby="oth-2503.13764">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Effective Dimension Aware Fractional-Order Stochastic Gradient Descent for Convex Optimization Problems </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Partohaghighi,+M">Mohammad Partohaghighi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Marcia,+R">Roummel Marcia</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+Y">YangQuan Chen</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> IEEE L-CSS submitted </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Optimization and Control (math.OC) </div> <p class='mathjax'> Fractional-order stochastic gradient descent (FOSGD) leverages a fractional exponent to capture long-memory effects in optimization, yet its practical impact is often constrained by the difficulty of tuning and stabilizing this exponent. In this work, we introduce 2SED Fractional-Order Stochastic Gradient Descent (2SEDFOSGD), a novel method that synergistically combines the Two-Scale Effective Dimension (2SED) algorithm with FOSGD to automatically calibrate the fractional exponent in a data-driven manner. By continuously gauging model sensitivity and effective dimensionality, 2SED dynamically adjusts the exponent to curb erratic oscillations and enhance convergence rates. Theoretically, we demonstrate how this dimension-aware adaptation retains the benefits of fractional memory while averting the sluggish or unstable behaviors frequently observed in naive fractional SGD. Empirical evaluations across multiple benchmarks confirm that our 2SED-driven fractional exponent approach not only converges faster but also achieves more robust final performance, suggesting broad applicability for fractional-order methodologies in large-scale machine learning and related domains. </p> </div> </dd> <dt> <a name='item28'>[28]</a> <a href ="/abs/2503.13766" title="Abstract" id="2503.13766"> arXiv:2503.13766 </a> [<a href="/pdf/2503.13766" title="Download PDF" id="pdf-2503.13766" aria-labelledby="pdf-2503.13766">pdf</a>, <a href="https://arxiv.org/html/2503.13766v1" title="View HTML" id="html-2503.13766" aria-labelledby="html-2503.13766" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13766" title="Other formats" id="oth-2503.13766" aria-labelledby="oth-2503.13766">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A finite-sample bound for identifying partially observed linear switched systems from a single trajectory </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Racz,+D">Daniel Racz</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Petreczky,+M">Mihaly Petreczky</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Daroczy,+B">Balint Daroczy</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Systems and Control (eess.SY) </div> <p class='mathjax'> We derive a finite-sample probabilistic bound on the parameter estimation error of a system identification algorithm for Linear Switched Systems. The algorithm estimates Markov parameters from a single trajectory and applies a variant of the Ho-Kalman algorithm to recover the system matrices. Our bound guarantees statistical consistency under the assumption that the true system exhibits quadratic stability. The proof leverages the theory of weakly dependent processes. To the best of our knowledge, this is the first finite-sample bound for this algorithm in the single-trajectory setting. </p> </div> </dd> <dt> <a name='item29'>[29]</a> <a href ="/abs/2503.13795" title="Abstract" id="2503.13795"> arXiv:2503.13795 </a> [<a href="/pdf/2503.13795" title="Download PDF" id="pdf-2503.13795" aria-labelledby="pdf-2503.13795">pdf</a>, <a href="https://arxiv.org/html/2503.13795v1" title="View HTML" id="html-2503.13795" aria-labelledby="html-2503.13795" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13795" title="Other formats" id="oth-2503.13795" aria-labelledby="oth-2503.13795">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> BurTorch: Revisiting Training from First Principles by Coupling Autodiff, Math Optimization, and Systems </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Burlachenko,+K">Konstantin Burlachenko</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Richt%C3%A1rik,+P">Peter Richt谩rik</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 46 pages, 7 figures, 19 tables </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Mathematical Software (cs.MS) </div> <p class='mathjax'> In this work, we introduce BurTorch, a compact high-performance framework designed to optimize Deep Learning (DL) training on single-node workstations through an exceptionally efficient CPU-based backpropagation (Rumelhart et al., 1986; Linnainmaa, 1970) implementation. Although modern DL frameworks rely on compilerlike optimizations internally, BurTorch takes a different path. It adopts a minimalist design and demonstrates that, in these circumstances, classical compiled programming languages can play a significant role in DL research. By eliminating the overhead of large frameworks and making efficient implementation choices, BurTorch achieves orders-of-magnitude improvements in performance and memory efficiency when computing $\nabla f(x)$ on a CPU. BurTorch features a compact codebase designed to achieve two key goals simultaneously. First, it provides a user experience similar to script-based programming environments. Second, it dramatically minimizes runtime overheads. In large DL frameworks, the primary source of memory overhead for relatively small computation graphs $f(x)$ is due to feature-heavy implementations. We benchmarked BurTorch against widely used DL frameworks in their execution modes: JAX (Bradbury et al., 2018), PyTorch (Paszke et al., 2019), TensorFlow (Abadi et al., 2016); and several standalone libraries: Autograd (Maclaurin et al., 2015), Micrograd (Karpathy, 2020), Apple MLX (Hannun et al., 2023). For small compute graphs, BurTorch outperforms best-practice solutions by up to $\times 2000$ in runtime and reduces memory consumption by up to $\times 3500$. For a miniaturized GPT-3 model (Brown et al., 2020), BurTorch achieves up to a $\times 20$ speedup and reduces memory up to $\times 80$ compared to PyTorch. </p> </div> </dd> <dt> <a name='item30'>[30]</a> <a href ="/abs/2503.13798" title="Abstract" id="2503.13798"> arXiv:2503.13798 </a> [<a href="/pdf/2503.13798" title="Download PDF" id="pdf-2503.13798" aria-labelledby="pdf-2503.13798">pdf</a>, <a href="https://arxiv.org/html/2503.13798v1" title="View HTML" id="html-2503.13798" aria-labelledby="html-2503.13798" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13798" title="Other formats" id="oth-2503.13798" aria-labelledby="oth-2503.13798">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> AI-Powered Prediction of Nanoparticle Pharmacokinetics: A Multi-View Learning Approach </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Khakpour,+A">Amirhossein Khakpour</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Florescu,+L">Lucia Florescu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tilley,+R">Richard Tilley</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jiang,+H">Haibo Jiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Iyer,+K+S">K. Swaminathan Iyer</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Carneiro,+G">Gustavo Carneiro</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> The clinical translation of nanoparticle-based treatments remains limited due to the unpredictability of (nanoparticle) NP pharmacokinetics$\unicode{x2014}$how they distribute, accumulate, and clear from the body. Predicting these behaviours is challenging due to complex biological interactions and the difficulty of obtaining high-quality experimental datasets. Existing AI-driven approaches rely heavily on data-driven learning but fail to integrate crucial knowledge about NP properties and biodistribution mechanisms. We introduce a multi-view deep learning framework that enhances pharmacokinetic predictions by incorporating prior knowledge of key NP properties such as size and charge into a cross-attention mechanism, enabling context-aware feature selection and improving generalization despite small datasets. To further enhance prediction robustness, we employ an ensemble learning approach, combining deep learning with XGBoost (XGB) and Random Forest (RF), which significantly outperforms existing AI models. Our interpretability analysis reveals key physicochemical properties driving NP biodistribution, providing biologically meaningful insights into possible mechanisms governing NP behaviour in vivo rather than a black-box model. Furthermore, by bridging machine learning with physiologically based pharmacokinetic (PBPK) modelling, this work lays the foundation for data-efficient AI-driven drug discovery and precision nanomedicine. </p> </div> </dd> <dt> <a name='item31'>[31]</a> <a href ="/abs/2503.13842" title="Abstract" id="2503.13842"> arXiv:2503.13842 </a> [<a href="/pdf/2503.13842" title="Download PDF" id="pdf-2503.13842" aria-labelledby="pdf-2503.13842">pdf</a>, <a href="https://arxiv.org/html/2503.13842v1" title="View HTML" id="html-2503.13842" aria-labelledby="html-2503.13842" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13842" title="Other formats" id="oth-2503.13842" aria-labelledby="oth-2503.13842">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Counterfactual experience augmented off-policy reinforcement learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Lee,+S">Sunbowen Lee</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gong,+Y">Yicheng Gong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Deng,+C">Chao Deng</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted by Neurocomputing, <a href="https://doi.org/10.1016/j.neucom.2025.130017" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Machine Learning (stat.ML) </div> <p class='mathjax'> Reinforcement learning control algorithms face significant challenges due to out-of-distribution and inefficient exploration problems. While model-based reinforcement learning enhances the agent's reasoning and planning capabilities by constructing virtual environments, training such virtual environments can be very complex. In order to build an efficient inference model and enhance the representativeness of learning data, we propose the Counterfactual Experience Augmentation (CEA) algorithm. CEA leverages variational autoencoders to model the dynamic patterns of state transitions and introduces randomness to model non-stationarity. This approach focuses on expanding the learning data in the experience pool through counterfactual inference and performs exceptionally well in environments that follow the bisimulation assumption. Environments with bisimulation properties are usually represented by discrete observation and action spaces, we propose a sampling method based on maximum kernel density estimation entropy to extend CEA to various environments. By providing reward signals for counterfactual state transitions based on real information, CEA constructs a complete counterfactual experience to alleviate the out-of-distribution problem of the learning data, and outperforms general SOTA algorithms in environments with difference properties. Finally, we discuss the similarities, differences and properties of generated counterfactual experiences and real experiences. The code is available at <a href="https://github.com/Aegis1863/CEA" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item32'>[32]</a> <a href ="/abs/2503.13868" title="Abstract" id="2503.13868"> arXiv:2503.13868 </a> [<a href="/pdf/2503.13868" title="Download PDF" id="pdf-2503.13868" aria-labelledby="pdf-2503.13868">pdf</a>, <a href="https://arxiv.org/html/2503.13868v1" title="View HTML" id="html-2503.13868" aria-labelledby="html-2503.13868" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13868" title="Other formats" id="oth-2503.13868" aria-labelledby="oth-2503.13868">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Out-of-Distribution Generalization in Time Series: A Survey </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+X">Xin Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Teng,+F">Fei Teng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+X">Xingwang Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+J">Ji Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+T">Tianrui Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Duan,+Q">Qiang Duan</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 20 pages, 8 figures, 5 tables. Work in Progress </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Time series frequently manifest distribution shifts, diverse latent features, and non-stationary learning dynamics, particularly in open and evolving environments. These characteristics pose significant challenges for out-of-distribution (OOD) generalization. While substantial progress has been made, a systematic synthesis of advancements remains lacking. To address this gap, we present the first comprehensive review of OOD generalization methodologies for time series, organized to delineate the field's evolutionary trajectory and contemporary research landscape. We organize our analysis across three foundational dimensions: data distribution, representation learning, and OOD evaluation. For each dimension, we present several popular algorithms in detail. Furthermore, we highlight key application scenarios, emphasizing their real-world impact. Finally, we identify persistent challenges and propose future research directions. A detailed summary of the methods reviewed for the generalization of OOD in time series can be accessed at <a href="https://tsood-generalization.com" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item33'>[33]</a> <a href ="/abs/2503.13872" title="Abstract" id="2503.13872"> arXiv:2503.13872 </a> [<a href="/pdf/2503.13872" title="Download PDF" id="pdf-2503.13872" aria-labelledby="pdf-2503.13872">pdf</a>, <a href="/format/2503.13872" title="Other formats" id="oth-2503.13872" aria-labelledby="oth-2503.13872">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Empirical Calibration and Metric Differential Privacy in Language Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Faustini,+P">Pedro Faustini</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fernandes,+N">Natasha Fernandes</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=McIver,+A">Annabelle McIver</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dras,+M">Mark Dras</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 16 pages </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Cryptography and Security (cs.CR) </div> <p class='mathjax'> NLP models trained with differential privacy (DP) usually adopt the DP-SGD framework, and privacy guarantees are often reported in terms of the privacy budget $\epsilon$. However, $\epsilon$ does not have any intrinsic meaning, and it is generally not possible to compare across variants of the framework. Work in image processing has therefore explored how to empirically calibrate noise across frameworks using Membership Inference Attacks (MIAs). However, this kind of calibration has not been established for NLP. In this paper, we show that MIAs offer little help in calibrating privacy, whereas reconstruction attacks are more useful. As a use case, we define a novel kind of directional privacy based on the von Mises-Fisher (VMF) distribution, a metric DP mechanism that perturbs angular distance rather than adding (isotropic) Gaussian noise, and apply this to NLP architectures. We show that, even though formal guarantees are incomparable, empirical privacy calibration reveals that each mechanism has different areas of strength with respect to utility-privacy trade-offs. </p> </div> </dd> <dt> <a name='item34'>[34]</a> <a href ="/abs/2503.13874" title="Abstract" id="2503.13874"> arXiv:2503.13874 </a> [<a href="/pdf/2503.13874" title="Download PDF" id="pdf-2503.13874" aria-labelledby="pdf-2503.13874">pdf</a>, <a href="https://arxiv.org/html/2503.13874v1" title="View HTML" id="html-2503.13874" aria-labelledby="html-2503.13874" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13874" title="Other formats" id="oth-2503.13874" aria-labelledby="oth-2503.13874">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Multi-label feature selection based on binary hashing learning and dynamic graph constraints </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Guo,+C">Cong Guo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+C">Changqin Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+W">Wenhua Zhou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+X">Xiaodi Huang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 21 pages,19 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> Multi-label learning poses significant challenges in extracting reliable supervisory signals from the label space. Existing approaches often employ continuous pseudo-labels to replace binary labels, improving supervisory information representation. However, these methods can introduce noise from irrelevant labels and lead to unreliable graph structures. To overcome these limitations, this study introduces a novel multi-label feature selection method called Binary Hashing and Dynamic Graph Constraint (BHDG), the first method to integrate binary hashing into multi-label learning. BHDG utilizes low-dimensional binary hashing codes as pseudo-labels to reduce noise and improve representation robustness. A dynamically constrained sample projection space is constructed based on the graph structure of these binary pseudo-labels, enhancing the reliability of the dynamic graph. To further enhance pseudo-label quality, BHDG incorporates label graph constraints and inner product minimization within the sample space. Additionally, an $l_{2,1}$-norm regularization term is added to the objective function to facilitate the feature selection process. The augmented Lagrangian multiplier (ALM) method is employed to optimize binary variables effectively. Comprehensive experiments on 10 benchmark datasets demonstrate that BHDG outperforms ten state-of-the-art methods across six evaluation metrics. BHDG achieves the highest overall performance ranking, surpassing the next-best method by an average of at least 2.7 ranks per metric, underscoring its effectiveness and robustness in multi-label feature selection. </p> </div> </dd> <dt> <a name='item35'>[35]</a> <a href ="/abs/2503.13882" title="Abstract" id="2503.13882"> arXiv:2503.13882 </a> [<a href="/pdf/2503.13882" title="Download PDF" id="pdf-2503.13882" aria-labelledby="pdf-2503.13882">pdf</a>, <a href="https://arxiv.org/html/2503.13882v1" title="View HTML" id="html-2503.13882" aria-labelledby="html-2503.13882" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13882" title="Other formats" id="oth-2503.13882" aria-labelledby="oth-2503.13882">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> MoK-RAG: Mixture of Knowledge Paths Enhanced Retrieval-Augmented Generation for Embodied AI Environments </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Guo,+Z">Zhengsheng Guo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zheng,+L">Linwei Zheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+X">Xinyang Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bai,+X">Xuefeng Bai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+K">Kehai Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+M">Min Zhang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> While human cognition inherently retrieves information from diverse and specialized knowledge sources during decision-making processes, current Retrieval-Augmented Generation (RAG) systems typically operate through single-source knowledge retrieval, leading to a cognitive-algorithmic discrepancy. To bridge this gap, we introduce MoK-RAG, a novel multi-source RAG framework that implements a mixture of knowledge paths enhanced retrieval mechanism through functional partitioning of a large language model (LLM) corpus into distinct sections, enabling retrieval from multiple specialized knowledge paths. Applied to the generation of 3D simulated environments, our proposed MoK-RAG3D enhances this paradigm by partitioning 3D assets into distinct sections and organizing them based on a hierarchical knowledge tree structure. Different from previous methods that only use manual evaluation, we pioneered the introduction of automated evaluation methods for 3D scenes. Both automatic and human evaluations in our experiments demonstrate that MoK-RAG3D can assist Embodied AI agents in generating diverse scenes. </p> </div> </dd> <dt> <a name='item36'>[36]</a> <a href ="/abs/2503.13899" title="Abstract" id="2503.13899"> arXiv:2503.13899 </a> [<a href="/pdf/2503.13899" title="Download PDF" id="pdf-2503.13899" aria-labelledby="pdf-2503.13899">pdf</a>, <a href="https://arxiv.org/html/2503.13899v1" title="View HTML" id="html-2503.13899" aria-labelledby="html-2503.13899" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13899" title="Other formats" id="oth-2503.13899" aria-labelledby="oth-2503.13899">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Learning local neighborhoods of non-Gaussian graphical models: A measure transport approach </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Liaw,+S">Sarah Liaw</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Morrison,+R">Rebecca Morrison</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Marzouk,+Y">Youssef Marzouk</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Baptista,+R">Ricardo Baptista</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted in AAAI 2025: 23 pages, 9 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Computation (stat.CO) </div> <p class='mathjax'> Identifying the Markov properties or conditional independencies of a collection of random variables is a fundamental task in statistics for modeling and inference. Existing approaches often learn the structure of a probabilistic graphical model, which encodes these dependencies, by assuming that the variables follow a distribution with a simple parametric form. Moreover, the computational cost of many algorithms scales poorly for high-dimensional distributions, as they need to estimate all the edges in the graph simultaneously. In this work, we propose a scalable algorithm to infer the conditional independence relationships of each variable by exploiting the local Markov property. The proposed method, named Localized Sparsity Identification for Non-Gaussian Distributions (L-SING), estimates the graph by using flexible classes of transport maps to represent the conditional distribution for each variable. We show that L-SING includes existing approaches, such as neighborhood selection with Lasso, as a special case. We demonstrate the effectiveness of our algorithm in both Gaussian and non-Gaussian settings by comparing it to existing methods. Lastly, we show the scalability of the proposed approach by applying it to high-dimensional non-Gaussian examples, including a biological dataset with more than 150 variables. </p> </div> </dd> <dt> <a name='item37'>[37]</a> <a href ="/abs/2503.13909" title="Abstract" id="2503.13909"> arXiv:2503.13909 </a> [<a href="/pdf/2503.13909" title="Download PDF" id="pdf-2503.13909" aria-labelledby="pdf-2503.13909">pdf</a>, <a href="https://arxiv.org/html/2503.13909v1" title="View HTML" id="html-2503.13909" aria-labelledby="html-2503.13909" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13909" title="Other formats" id="oth-2503.13909" aria-labelledby="oth-2503.13909">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Quantification of Uncertainties in Probabilistic Deep Neural Network by Implementing Boosting of Variational Inference </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Bera,+P">Pavia Bera</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bhanja,+S">Sanjukta Bhanja</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Machine Learning (stat.ML) </div> <p class='mathjax'> Modern neural network architectures have achieved remarkable accuracies but remain highly dependent on their training data, often lacking interpretability in their learned mappings. While effective on large datasets, they tend to overfit on smaller ones. Probabilistic neural networks, such as those utilizing variational inference, address this limitation by incorporating uncertainty estimation through weight distributions rather than point estimates. However, standard variational inference often relies on a single-density approximation, which can lead to poor posterior estimates and hinder model performance. We propose Boosted Bayesian Neural Networks (BBNN), a novel approach that enhances neural network weight distribution approximations using Boosting Variational Inference (BVI). By iteratively constructing a mixture of densities, BVI expands the approximating family, enabling a more expressive posterior that leads to improved generalization and uncertainty estimation. While this approach increases computational complexity, it significantly enhances accuracy an essential tradeoff, particularly in high-stakes applications such as medical diagnostics, where false negatives can have severe consequences. Our experimental results demonstrate that BBNN achieves ~5% higher accuracy compared to conventional neural networks while providing superior uncertainty quantification. This improvement highlights the effectiveness of leveraging a mixture-based variational family to better approximate the posterior distribution, ultimately advancing probabilistic deep learning. </p> </div> </dd> <dt> <a name='item38'>[38]</a> <a href ="/abs/2503.13911" title="Abstract" id="2503.13911"> arXiv:2503.13911 </a> [<a href="/pdf/2503.13911" title="Download PDF" id="pdf-2503.13911" aria-labelledby="pdf-2503.13911">pdf</a>, <a href="https://arxiv.org/html/2503.13911v1" title="View HTML" id="html-2503.13911" aria-labelledby="html-2503.13911" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13911" title="Other formats" id="oth-2503.13911" aria-labelledby="oth-2503.13911">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Incorporating Attributes and Multi-Scale Structures for Heterogeneous Graph Contrastive Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Jiang,+R">Ruobing Jiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+Y">Yacong Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+H">Haobing Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+Y">Yanwei Yu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Social and Information Networks (cs.SI) </div> <p class='mathjax'> Heterogeneous graphs (HGs) are composed of multiple types of nodes and edges, making it more effective in capturing the complex relational structures inherent in the real world. However, in real-world scenarios, labeled data is often difficult to obtain, which limits the applicability of semi-supervised approaches. Self-supervised learning aims to enable models to automatically learn useful features from data, effectively addressing the challenge of limited labeling data. In this paper, we propose a novel contrastive learning framework for heterogeneous graphs (ASHGCL), which incorporates three distinct views, each focusing on node attributes, high-order and low-order structural information, respectively, to effectively capture attribute information, high-order structures, and low-order structures for node representation learning. Furthermore, we introduce an attribute-enhanced positive sample selection strategy that combines both structural information and attribute information, effectively addressing the issue of sampling bias. Extensive experiments on four real-world datasets show that ASHGCL outperforms state-of-the-art unsupervised baselines and even surpasses some supervised benchmarks. </p> </div> </dd> <dt> <a name='item39'>[39]</a> <a href ="/abs/2503.13912" title="Abstract" id="2503.13912"> arXiv:2503.13912 </a> [<a href="/pdf/2503.13912" title="Download PDF" id="pdf-2503.13912" aria-labelledby="pdf-2503.13912">pdf</a>, <a href="https://arxiv.org/html/2503.13912v1" title="View HTML" id="html-2503.13912" aria-labelledby="html-2503.13912" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13912" title="Other formats" id="oth-2503.13912" aria-labelledby="oth-2503.13912">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> KANITE: Kolmogorov-Arnold Networks for ITE estimation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Mehendale,+E">Eshan Mehendale</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Thorat,+A">Abhinav Thorat</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kolla,+R">Ravi Kolla</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pedanekar,+N">Niranjan Pedanekar</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 16 pages, 4 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Methodology (stat.ME) </div> <p class='mathjax'> We introduce KANITE, a framework leveraging Kolmogorov-Arnold Networks (KANs) for Individual Treatment Effect (ITE) estimation under multiple treatments setting in causal inference. By utilizing KAN's unique abilities to learn univariate activation functions as opposed to learning linear weights by Multi-Layer Perceptrons (MLPs), we improve the estimates of ITEs. The KANITE framework comprises two key architectures: <a href="http://1.Integral" rel="external noopener nofollow" class="link-external link-http">this http URL</a> Probability Metric (IPM) architecture: This employs an IPM loss in a specialized manner to effectively align towards ITE estimation across multiple treatments. 2. Entropy Balancing (EB) architecture: This uses weights for samples that are learned by optimizing entropy subject to balancing the covariates across treatment groups. Extensive evaluations on benchmark datasets demonstrate that KANITE outperforms state-of-the-art algorithms in both $\epsilon_{\text{PEHE}}$ and $\epsilon_{\text{ATE}}$ metrics. Our experiments highlight the advantages of KANITE in achieving improved causal estimates, emphasizing the potential of KANs to advance causal inference methodologies across diverse application areas. </p> </div> </dd> <dt> <a name='item40'>[40]</a> <a href ="/abs/2503.13917" title="Abstract" id="2503.13917"> arXiv:2503.13917 </a> [<a href="/pdf/2503.13917" title="Download PDF" id="pdf-2503.13917" aria-labelledby="pdf-2503.13917">pdf</a>, <a href="https://arxiv.org/html/2503.13917v1" title="View HTML" id="html-2503.13917" aria-labelledby="html-2503.13917" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13917" title="Other formats" id="oth-2503.13917" aria-labelledby="oth-2503.13917">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Robust Machine Unlearning for Quantized Neural Networks via Adaptive Gradient Reweighting with Similar Labels </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Tong,+Y">Yujia Tong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Y">Yuze Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yuan,+J">Jingling Yuan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hu,+C">Chuang Hu</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 15 pages, 4 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> Model quantization enables efficient deployment of deep neural networks on edge devices through low-bit parameter representation, yet raises critical challenges for implementing machine unlearning (MU) under data privacy regulations. Existing MU methods designed for full-precision models fail to address two fundamental limitations in quantized networks: 1) Noise amplification from label mismatch during data processing, and 2) Gradient imbalance between forgotten and retained data during training. These issues are exacerbated by quantized models' constrained parameter space and discrete optimization. We propose Q-MUL, the first dedicated unlearning framework for quantized models. Our method introduces two key innovations: 1) Similar Labels assignment replaces random labels with semantically consistent alternatives to minimize noise injection, and 2) Adaptive Gradient Reweighting dynamically aligns parameter update contributions from forgotten and retained data. Through systematic analysis of quantized model vulnerabilities, we establish theoretical foundations for these mechanisms. Extensive evaluations on benchmark datasets demonstrate Q-MUL's superiority over existing approaches. </p> </div> </dd> <dt> <a name='item41'>[41]</a> <a href ="/abs/2503.13921" title="Abstract" id="2503.13921"> arXiv:2503.13921 </a> [<a href="/pdf/2503.13921" title="Download PDF" id="pdf-2503.13921" aria-labelledby="pdf-2503.13921">pdf</a>, <a href="https://arxiv.org/html/2503.13921v1" title="View HTML" id="html-2503.13921" aria-labelledby="html-2503.13921" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13921" title="Other formats" id="oth-2503.13921" aria-labelledby="oth-2503.13921">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Learning Accurate Models on Incomplete Data with Minimal Imputation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhen,+C">Cheng Zhen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Aryal,+N">Nischal Aryal</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Termehchy,+A">Arash Termehchy</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Prayoga">Prayoga</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Biwer,+G">Garrett Biwer</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Patil,+S">Sankalp Patil</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Missing data often exists in real-world datasets, requiring significant time and effort for imputation to learn accurate machine learning (ML) models. In this paper, we demonstrate that imputing all missing values is not always necessary to achieve an accurate ML model. We introduce the concept of minimal data imputation, which ensures accurate ML models trained over the imputed dataset. Implementing minimal imputation guarantees both minimal imputation effort and optimal ML models. We propose algorithms to find exact and approximate minimal imputation for various ML models. Our extensive experiments indicate that our proposed algorithms significantly reduce the time and effort required for data imputation. </p> </div> </dd> <dt> <a name='item42'>[42]</a> <a href ="/abs/2503.13925" title="Abstract" id="2503.13925"> arXiv:2503.13925 </a> [<a href="/pdf/2503.13925" title="Download PDF" id="pdf-2503.13925" aria-labelledby="pdf-2503.13925">pdf</a>, <a href="https://arxiv.org/html/2503.13925v1" title="View HTML" id="html-2503.13925" aria-labelledby="html-2503.13925" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13925" title="Other formats" id="oth-2503.13925" aria-labelledby="oth-2503.13925">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Reconstructing Cell Lineage Trees from Phenotypic Features with Metric Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Kuang,+D">Da Kuang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Qiu,+G">Guanwen Qiu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+J">Junhyong Kim</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> How a single fertilized cell gives rise to a complex array of specialized cell types in development is a central question in biology. The cells grow, divide, and acquire differentiated characteristics through poorly understood molecular processes. A key approach to studying developmental processes is to infer the tree graph of cell lineage division and differentiation histories, providing an analytical framework for dissecting individual cells' molecular decisions during replication and differentiation. Although genetically engineered lineage-tracing methods have advanced the field, they are either infeasible or ethically constrained in many organisms. In contrast, modern single-cell technologies can measure high-content molecular profiles (e.g., transcriptomes) in a wide range of biological systems. <br>Here, we introduce CellTreeQM, a novel deep learning method based on transformer architectures that learns an embedding space with geometric properties optimized for tree-graph inference. By formulating lineage reconstruction as a tree-metric learning problem, we have systematically explored supervised, weakly supervised, and unsupervised training settings and present a Lineage Reconstruction Benchmark to facilitate comprehensive evaluation of our learning method. We benchmarked the method on (1) synthetic data modeled via Brownian motion with independent noise and spurious signals and (2) lineage-resolved single-cell RNA sequencing datasets. Experimental results show that CellTreeQM recovers lineage structures with minimal supervision and limited data, offering a scalable framework for uncovering cell lineage relationships in challenging animal models. To our knowledge, this is the first method to cast cell lineage inference explicitly as a metric learning task, paving the way for future computational models aimed at uncovering the molecular dynamics of cell lineage. </p> </div> </dd> <dt> <a name='item43'>[43]</a> <a href ="/abs/2503.13942" title="Abstract" id="2503.13942"> arXiv:2503.13942 </a> [<a href="/pdf/2503.13942" title="Download PDF" id="pdf-2503.13942" aria-labelledby="pdf-2503.13942">pdf</a>, <a href="/format/2503.13942" title="Other formats" id="oth-2503.13942" aria-labelledby="oth-2503.13942">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Structured Knowledge Accumulation: An Autonomous Framework for Layer-Wise Entropy Reduction in Neural Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Quantiota,+B+M">Bouarfa Mahi Quantiota</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 16 pages, 6 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Neural and Evolutionary Computing (cs.NE) </div> <p class='mathjax'> We introduce the Structured Knowledge Accumulation (SKA) framework, which reinterprets entropy as a dynamic, layer-wise measure of knowledge alignment in neural networks. Instead of relying on traditional gradient-based optimization, SKA defines entropy in terms of knowledge vectors and their influence on decision probabilities across multiple layers. This formulation naturally leads to the emergence of activation functions such as the sigmoid as a consequence of entropy minimization. Unlike conventional backpropagation, SKA allows each layer to optimize independently by aligning its knowledge representation with changes in decision probabilities. As a result, total network entropy decreases in a hierarchical manner, allowing knowledge structures to evolve progressively. This approach provides a scalable, biologically plausible alternative to gradient-based learning, bridging information theory and artificial intelligence while offering promising applications in resource-constrained and parallel computing environments. </p> </div> </dd> <dt> <a name='item44'>[44]</a> <a href ="/abs/2503.13954" title="Abstract" id="2503.13954"> arXiv:2503.13954 </a> [<a href="/pdf/2503.13954" title="Download PDF" id="pdf-2503.13954" aria-labelledby="pdf-2503.13954">pdf</a>, <a href="https://arxiv.org/html/2503.13954v1" title="View HTML" id="html-2503.13954" aria-labelledby="html-2503.13954" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13954" title="Other formats" id="oth-2503.13954" aria-labelledby="oth-2503.13954">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Enhanced High-Dimensional Data Visualization through Adaptive Multi-Scale Manifold Embedding </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ni,+T">Tianhao Ni</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+B">Bingjie Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yao,+Z">Zhigang Yao</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> To address the dual challenges of the curse of dimensionality and the difficulty in separating intra-cluster and inter-cluster structures in high-dimensional manifold embedding, we proposes an Adaptive Multi-Scale Manifold Embedding (AMSME) algorithm. By introducing ordinal distance to replace traditional Euclidean distances, we theoretically demonstrate that ordinal distance overcomes the constraints of the curse of dimensionality in high-dimensional spaces, effectively distinguishing heterogeneous samples. We design an adaptive neighborhood adjustment method to construct similarity graphs that simultaneously balance intra-cluster compactness and inter-cluster separability. Furthermore, we develop a two-stage embedding framework: the first stage achieves preliminary cluster separation while preserving connectivity between structurally similar clusters via the similarity graph, and the second stage enhances inter-cluster separation through a label-driven distance reweighting. Experimental results demonstrate that AMSME significantly preserves intra-cluster topological structures and improves inter-cluster separation on real-world datasets. Additionally, leveraging its multi-resolution analysis capability, AMSME discovers novel neuronal subtypes in the mouse lumbar dorsal root ganglion scRNA-seq dataset, with marker gene analysis revealing their distinct biological roles. </p> </div> </dd> <dt> <a name='item45'>[45]</a> <a href ="/abs/2503.13964" title="Abstract" id="2503.13964"> arXiv:2503.13964 </a> [<a href="/pdf/2503.13964" title="Download PDF" id="pdf-2503.13964" aria-labelledby="pdf-2503.13964">pdf</a>, <a href="https://arxiv.org/html/2503.13964v1" title="View HTML" id="html-2503.13964" aria-labelledby="html-2503.13964" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13964" title="Other formats" id="oth-2503.13964" aria-labelledby="oth-2503.13964">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> MDocAgent: A Multi-Modal Multi-Agent Framework for Document Understanding </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Han,+S">Siwei Han</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xia,+P">Peng Xia</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+R">Ruiyi Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sun,+T">Tong Sun</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+Y">Yun Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+H">Hongtu Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yao,+H">Huaxiu Yao</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> Document Question Answering (DocQA) is a very common task. Existing methods using Large Language Models (LLMs) or Large Vision Language Models (LVLMs) and Retrieval Augmented Generation (RAG) often prioritize information from a single modal, failing to effectively integrate textual and visual cues. These approaches struggle with complex multi-modal reasoning, limiting their performance on real-world documents. We present MDocAgent (A Multi-Modal Multi-Agent Framework for Document Understanding), a novel RAG and multi-agent framework that leverages both text and image. Our system employs five specialized agents: a general agent, a critical agent, a text agent, an image agent and a summarizing agent. These agents engage in multi-modal context retrieval, combining their individual insights to achieve a more comprehensive understanding of the document's content. This collaborative approach enables the system to synthesize information from both textual and visual components, leading to improved accuracy in question answering. Preliminary experiments on five benchmarks like MMLongBench, LongDocURL demonstrate the effectiveness of our MDocAgent, achieve an average improvement of 12.1% compared to current state-of-the-art method. This work contributes to the development of more robust and comprehensive DocQA systems capable of handling the complexities of real-world documents containing rich textual and visual information. Our data and code are available at <a href="https://github.com/aiming-lab/MDocAgent" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item46'>[46]</a> <a href ="/abs/2503.13976" title="Abstract" id="2503.13976"> arXiv:2503.13976 </a> [<a href="/pdf/2503.13976" title="Download PDF" id="pdf-2503.13976" aria-labelledby="pdf-2503.13976">pdf</a>, <a href="https://arxiv.org/html/2503.13976v1" title="View HTML" id="html-2503.13976" aria-labelledby="html-2503.13976" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13976" title="Other formats" id="oth-2503.13976" aria-labelledby="oth-2503.13976">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A CNN-based End-to-End Learning for RIS-assisted Communication System </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ginige,+N">Nipuni Ginige</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rajatheva,+N">Nandana Rajatheva</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Latva-aho,+M">Matti Latva-aho</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> Reconfigurable intelligent surface (RIS) is an emerging technology that is used to improve the system performance in beyond 5G systems. In this letter, we propose a novel convolutional neural network (CNN)-based autoencoder to jointly optimize the transmitter, the receiver, and the RIS of a RIS-assisted communication system. The proposed system jointly optimizes the sub-tasks of the transmitter, the receiver, and the RIS such as encoding/decoding, channel estimation, phase optimization, and modulation/demodulation. Numerically we have shown that the bit error rate (BER) performance of the CNN-based autoencoder system is better than the theoretical BER performance of the RIS-assisted communication systems. </p> </div> </dd> <dt> <a name='item47'>[47]</a> <a href ="/abs/2503.13980" title="Abstract" id="2503.13980"> arXiv:2503.13980 </a> [<a href="/pdf/2503.13980" title="Download PDF" id="pdf-2503.13980" aria-labelledby="pdf-2503.13980">pdf</a>, <a href="https://arxiv.org/html/2503.13980v1" title="View HTML" id="html-2503.13980" aria-labelledby="html-2503.13980" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13980" title="Other formats" id="oth-2503.13980" aria-labelledby="oth-2503.13980">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Empowering LLMs in Decision Games through Algorithmic Data Synthesis </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+H">Haolin Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+X">Xueyan Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Niu,+Y">Yazhe Niu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hu,+S">Shuai Hu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+H">Hongsheng Li</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> Large Language Models (LLMs) have exhibited impressive capabilities across numerous domains, yet they often struggle with complex reasoning and decision-making tasks. Decision-making games, which inherently require multifaceted reasoning logic, serve as ideal sandboxes for evaluating and enhancing the reasoning abilities of LLMs. In this work, we first explore whether LLMs can master complex decision-making games through targeted post-training. To this end, we design data synthesis strategies and curate extensive offline datasets from two classic games, Doudizhu and Go. We further develop a suite of techniques to effectively incorporate this data into LLM training, resulting in two novel agents: Mastermind-Dou and Mastermind-Go. Our experimental results demonstrate that these Mastermind LLMs achieve competitive performance in their respective games. Additionally, we explore whether integrating decision-making data can enhance the general reasoning abilities of LLMs. Our findings suggest that such post-training improves certain aspects of reasoning, providing valuable insights for optimizing LLM data collection and synthesis strategies. </p> </div> </dd> <dt> <a name='item48'>[48]</a> <a href ="/abs/2503.14004" title="Abstract" id="2503.14004"> arXiv:2503.14004 </a> [<a href="/pdf/2503.14004" title="Download PDF" id="pdf-2503.14004" aria-labelledby="pdf-2503.14004">pdf</a>, <a href="https://arxiv.org/html/2503.14004v1" title="View HTML" id="html-2503.14004" aria-labelledby="html-2503.14004" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14004" title="Other formats" id="oth-2503.14004" aria-labelledby="oth-2503.14004">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Predicting Human Choice Between Textually Described Lotteries </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Marantz,+E">Eyal Marantz</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Plonsky,+O">Ori Plonsky</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> Predicting human decision-making under risk and uncertainty is a long-standing challenge in cognitive science, economics, and AI. While prior research has focused on numerically described lotteries, real-world decisions often rely on textual descriptions. This study conducts the first large-scale exploration of human decision-making in such tasks using a large dataset of one-shot binary choices between textually described lotteries. We evaluate multiple computational approaches, including fine-tuning Large Language Models (LLMs), leveraging embeddings, and integrating behavioral theories of choice under risk. Our results show that fine-tuned LLMs, specifically RoBERTa and GPT-4o outperform hybrid models that incorporate behavioral theory, challenging established methods in numerical settings. These findings highlight fundamental differences in how textual and numerical information influence decision-making and underscore the need for new modeling strategies to bridge this gap. </p> </div> </dd> <dt> <a name='item49'>[49]</a> <a href ="/abs/2503.14024" title="Abstract" id="2503.14024"> arXiv:2503.14024 </a> [<a href="/pdf/2503.14024" title="Download PDF" id="pdf-2503.14024" aria-labelledby="pdf-2503.14024">pdf</a>, <a href="https://arxiv.org/html/2503.14024v1" title="View HTML" id="html-2503.14024" aria-labelledby="html-2503.14024" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14024" title="Other formats" id="oth-2503.14024" aria-labelledby="oth-2503.14024">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Uncertainty-Aware Global-View Reconstruction for Multi-View Multi-Label Feature Selection </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Hao,+P">Pingting Hao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+K">Kunpeng Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gao,+W">Wanfu Gao</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 9 pages,5 figures, accept in AAAI 25 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> In recent years, multi-view multi-label learning (MVML) has gained popularity due to its close resemblance to real-world scenarios. However, the challenge of selecting informative features to ensure both performance and efficiency remains a significant question in MVML. Existing methods often extract information separately from the consistency part and the complementary part, which may result in noise due to unclear segmentation. In this paper, we propose a unified model constructed from the perspective of global-view reconstruction. Additionally, while feature selection methods can discern the importance of features, they typically overlook the uncertainty of samples, which is prevalent in realistic scenarios. To address this, we incorporate the perception of sample uncertainty during the reconstruction process to enhance trustworthiness. Thus, the global-view is reconstructed through the graph structure between samples, sample confidence, and the view relationship. The accurate mapping is established between the reconstructed view and the label matrix. Experimental results demonstrate the superior performance of our method on multi-view datasets. </p> </div> </dd> <dt> <a name='item50'>[50]</a> <a href ="/abs/2503.14043" title="Abstract" id="2503.14043"> arXiv:2503.14043 </a> [<a href="/pdf/2503.14043" title="Download PDF" id="pdf-2503.14043" aria-labelledby="pdf-2503.14043">pdf</a>, <a href="https://arxiv.org/html/2503.14043v1" title="View HTML" id="html-2503.14043" aria-labelledby="html-2503.14043" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14043" title="Other formats" id="oth-2503.14043" aria-labelledby="oth-2503.14043">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Learning on LLM Output Signatures for gray-box LLM Behavior Analysis </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Bar-Shalom,+G">Guy Bar-Shalom</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Frasca,+F">Fabrizio Frasca</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lim,+D">Derek Lim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gelberg,+Y">Yoav Gelberg</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ziser,+Y">Yftah Ziser</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=El-Yaniv,+R">Ran El-Yaniv</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chechik,+G">Gal Chechik</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Maron,+H">Haggai Maron</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> Large Language Models (LLMs) have achieved widespread adoption, yet our understanding of their behavior remains limited, particularly in detecting data contamination and hallucinations. While recently proposed probing techniques provide insights through activation analysis, they require "white-box" access to model internals, often unavailable. Current "gray-box" approaches typically analyze only the probability of the actual tokens in the sequence with simple task-specific heuristics. Importantly, these methods overlook the rich information contained in the full token distribution at each processing step. To address these limitations, we propose that gray-box analysis should leverage the complete observable output of LLMs, consisting of both the previously used token probabilities as well as the complete token distribution sequences - a unified data type we term LOS (LLM Output Signature). To this end, we develop a transformer-based approach to process LOS that theoretically guarantees approximation of existing techniques while enabling more nuanced analysis. Our approach achieves superior performance on hallucination and data contamination detection in gray-box settings, significantly outperforming existing baselines. Furthermore, it demonstrates strong transfer capabilities across datasets and LLMs, suggesting that LOS captures fundamental patterns in LLM behavior. Our code is available at: <a href="https://github.com/BarSGuy/LLM-Output-Signatures-Network" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item51'>[51]</a> <a href ="/abs/2503.14053" title="Abstract" id="2503.14053"> arXiv:2503.14053 </a> [<a href="/pdf/2503.14053" title="Download PDF" id="pdf-2503.14053" aria-labelledby="pdf-2503.14053">pdf</a>, <a href="https://arxiv.org/html/2503.14053v1" title="View HTML" id="html-2503.14053" aria-labelledby="html-2503.14053" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14053" title="Other formats" id="oth-2503.14053" aria-labelledby="oth-2503.14053">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> ON-Traffic: An Operator Learning Framework for Online Traffic Flow Estimation and Uncertainty Quantification from Lagrangian Sensors </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Rap,+J">Jake Rap</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Das,+A">Amritam Das</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Systems and Control (eess.SY) </div> <p class='mathjax'> Accurate traffic flow estimation and prediction are critical for the efficient management of transportation systems, particularly under increasing urbanization. Traditional methods relying on static sensors often suffer from limited spatial coverage, while probe vehicles provide richer, albeit sparse and irregular data. This work introduces ON-Traffic, a novel deep operator Network and a receding horizon learning-based framework tailored for online estimation of spatio-temporal traffic state along with quantified uncertainty by using measurements from moving probe vehicles and downstream boundary inputs. Our framework is evaluated in both numerical and simulation datasets, showcasing its ability to handle irregular, sparse input data, adapt to time-shifted scenarios, and provide well-calibrated uncertainty estimates. The results demonstrate that the model captures complex traffic phenomena, including shockwaves and congestion propagation, while maintaining robustness to noise and sensor dropout. These advancements present a significant step toward online, adaptive traffic management systems. </p> </div> </dd> <dt> <a name='item52'>[52]</a> <a href ="/abs/2503.14076" title="Abstract" id="2503.14076"> arXiv:2503.14076 </a> [<a href="/pdf/2503.14076" title="Download PDF" id="pdf-2503.14076" aria-labelledby="pdf-2503.14076">pdf</a>, <a href="https://arxiv.org/html/2503.14076v1" title="View HTML" id="html-2503.14076" aria-labelledby="html-2503.14076" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14076" title="Other formats" id="oth-2503.14076" aria-labelledby="oth-2503.14076">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Theoretical Foundation of Flow-Based Time Series Generation: Provable Approximation, Generalization, and Efficiency </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Long,+J">Jiangxuan Long</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Song,+Z">Zhao Song</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+C">Chiwun Yang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 33 pages </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Recent studies suggest utilizing generative models instead of traditional auto-regressive algorithms for time series forecasting (TSF) tasks. These non-auto-regressive approaches involving different generative methods, including GAN, Diffusion, and Flow Matching for time series, have empirically demonstrated high-quality generation capability and accuracy. However, we still lack an appropriate understanding of how it processes approximation and generalization. This paper presents the first theoretical framework from the perspective of flow-based generative models to relieve the knowledge of limitations. In particular, we provide our insights with strict guarantees from three perspectives: $\textbf{Approximation}$, $\textbf{Generalization}$ and $\textbf{Efficiency}$. In detail, our analysis achieves the contributions as follows: <br>$\bullet$ By assuming a general data model, the fitting of the flow-based generative models is confirmed to converge to arbitrary error under the universal approximation of Diffusion Transformer (DiT). <br>$\bullet$ Introducing a polynomial-based regularization for flow matching, the generalization error thus be bounded since the generalization of polynomial approximation. <br>$\bullet$ The sampling for generation is considered as an optimization process, we demonstrate its fast convergence with updating standard first-order gradient descent of some objective. </p> </div> </dd> <dt> <a name='item53'>[53]</a> <a href ="/abs/2503.14125" title="Abstract" id="2503.14125"> arXiv:2503.14125 </a> [<a href="/pdf/2503.14125" title="Download PDF" id="pdf-2503.14125" aria-labelledby="pdf-2503.14125">pdf</a>, <a href="https://arxiv.org/html/2503.14125v1" title="View HTML" id="html-2503.14125" aria-labelledby="html-2503.14125" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14125" title="Other formats" id="oth-2503.14125" aria-labelledby="oth-2503.14125">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Frac-Connections: Fractional Extension of Hyper-Connections </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+D">Defa Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+H">Hongzhi Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+J">Jundong Zhou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+Z">Zihao Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zeng,+Y">Yutao Zeng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+B">Banggu Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Min,+Q">Qiyang Min</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+X">Xun Zhou</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Computation and Language (cs.CL) </div> <p class='mathjax'> Residual connections are central to modern deep learning architectures, enabling the training of very deep networks by mitigating gradient vanishing. Hyper-Connections recently generalized residual connections by introducing multiple connection strengths at different depths, thereby addressing the seesaw effect between gradient vanishing and representation collapse. However, Hyper-Connections increase memory access costs by expanding the width of hidden states. In this paper, we propose Frac-Connections, a novel approach that divides hidden states into multiple parts rather than expanding their width. Frac-Connections retain partial benefits of Hyper-Connections while reducing memory consumption. To validate their effectiveness, we conduct large-scale experiments on language tasks, with the largest being a 7B MoE model trained on up to 3T tokens, demonstrating that Frac-Connections significantly outperform residual connections. </p> </div> </dd> <dt> <a name='item54'>[54]</a> <a href ="/abs/2503.14153" title="Abstract" id="2503.14153"> arXiv:2503.14153 </a> [<a href="/pdf/2503.14153" title="Download PDF" id="pdf-2503.14153" aria-labelledby="pdf-2503.14153">pdf</a>, <a href="https://arxiv.org/html/2503.14153v1" title="View HTML" id="html-2503.14153" aria-labelledby="html-2503.14153" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14153" title="Other formats" id="oth-2503.14153" aria-labelledby="oth-2503.14153">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Speculative Decoding for Verilog: Speed and Quality, All in One </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+C">Changran Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Y">Yi Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+Y">Yunhao Zhou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+S">Shan Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+N">Ningyi Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+Q">Qiang Xu</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted by the 62nd Design Automation Conference (DAC 2025) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Hardware Architecture (cs.AR); Computation and Language (cs.CL) </div> <p class='mathjax'> The rapid advancement of large language models (LLMs) has revolutionized code generation tasks across various programming languages. However, the unique characteristics of programming languages, particularly those like Verilog with specific syntax and lower representation in training datasets, pose significant challenges for conventional tokenization and decoding approaches. In this paper, we introduce a novel application of speculative decoding for Verilog code generation, showing that it can improve both inference speed and output quality, effectively achieving speed and quality all in one. Unlike standard LLM tokenization schemes, which often fragment meaningful code structures, our approach aligns decoding stops with syntactically significant tokens, making it easier for models to learn the token distribution. This refinement addresses inherent tokenization issues and enhances the model's ability to capture Verilog's logical constructs more effectively. Our experimental results show that our method achieves up to a 5.05x speedup in Verilog code generation and increases pass@10 functional accuracy on RTLLM by up to 17.19% compared to conventional training strategies. These findings highlight speculative decoding as a promising approach to bridge the quality gap in code generation for specialized programming languages. </p> </div> </dd> <dt> <a name='item55'>[55]</a> <a href ="/abs/2503.14205" title="Abstract" id="2503.14205"> arXiv:2503.14205 </a> [<a href="/pdf/2503.14205" title="Download PDF" id="pdf-2503.14205" aria-labelledby="pdf-2503.14205">pdf</a>, <a href="https://arxiv.org/html/2503.14205v1" title="View HTML" id="html-2503.14205" aria-labelledby="html-2503.14205" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14205" title="Other formats" id="oth-2503.14205" aria-labelledby="oth-2503.14205">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Layer-wise Adaptive Gradient Norm Penalizing Method for Efficient and Accurate Deep Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Lee,+S">Sunwoo Lee</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Published in KDD 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> Sharpness-aware minimization (SAM) is known to improve the generalization performance of neural networks. However, it is not widely used in real-world applications yet due to its expensive model perturbation cost. A few variants of SAM have been proposed to tackle such an issue, but they commonly do not alleviate the cost noticeably. In this paper, we propose a lightweight layer-wise gradient norm penalizing method that tackles the expensive computational cost of SAM while maintaining its superior generalization performance. Our study empirically proves that the gradient norm of the whole model can be effectively suppressed by penalizing the gradient norm of only a few critical layers. We also theoretically show that such a partial model perturbation does not harm the convergence rate of SAM, allowing them to be safely adapted in real-world applications. To demonstrate the efficacy of the proposed method, we perform extensive experiments comparing the proposed method to mini-batch SGD and the conventional SAM using representative computer vision and language modeling benchmarks. </p> </div> </dd> <dt> <a name='item56'>[56]</a> <a href ="/abs/2503.14217" title="Abstract" id="2503.14217"> arXiv:2503.14217 </a> [<a href="/pdf/2503.14217" title="Download PDF" id="pdf-2503.14217" aria-labelledby="pdf-2503.14217">pdf</a>, <a href="https://arxiv.org/html/2503.14217v1" title="View HTML" id="html-2503.14217" aria-labelledby="html-2503.14217" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14217" title="Other formats" id="oth-2503.14217" aria-labelledby="oth-2503.14217">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Decision Tree Induction Through LLMs via Semantically-Aware Evolution </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+T">Tennison Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huynh,+N">Nicolas Huynh</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=van+der+Schaar,+M">Mihaela van der Schaar</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> *Liu and Huynh contributed equally. Published as a conference paper at ICLR 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> Decision trees are a crucial class of models offering robust predictive performance and inherent interpretability across various domains, including healthcare, finance, and logistics. However, current tree induction methods often face limitations such as suboptimal solutions from greedy methods or prohibitive computational costs and limited applicability of exact optimization approaches. To address these challenges, we propose an evolutionary optimization method for decision tree induction based on genetic programming (GP). Our key innovation is the integration of semantic priors and domain-specific knowledge about the search space into the optimization algorithm. To this end, we introduce $\texttt{LLEGO}$, a framework that incorporates semantic priors into genetic search operators through the use of Large Language Models (LLMs), thereby enhancing search efficiency and targeting regions of the search space that yield decision trees with superior generalization performance. This is operationalized through novel genetic operators that work with structured natural language prompts, effectively utilizing LLMs as conditional generative models and sources of semantic knowledge. Specifically, we introduce $\textit{fitness-guided}$ crossover to exploit high-performing regions, and $\textit{diversity-guided}$ mutation for efficient global exploration of the search space. These operators are controlled by corresponding hyperparameters that enable a more nuanced balance between exploration and exploitation across the search space. Empirically, we demonstrate across various benchmarks that $\texttt{LLEGO}$ evolves superior-performing trees compared to existing tree induction methods, and exhibits significantly more efficient search performance compared to conventional GP approaches. </p> </div> </dd> <dt> <a name='item57'>[57]</a> <a href ="/abs/2503.14239" title="Abstract" id="2503.14239"> arXiv:2503.14239 </a> [<a href="/pdf/2503.14239" title="Download PDF" id="pdf-2503.14239" aria-labelledby="pdf-2503.14239">pdf</a>, <a href="https://arxiv.org/html/2503.14239v1" title="View HTML" id="html-2503.14239" aria-labelledby="html-2503.14239" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14239" title="Other formats" id="oth-2503.14239" aria-labelledby="oth-2503.14239">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Predicting Cardiopulmonary Exercise Testing Outcomes in Congenital Heart Disease Through Multi-modal Data Integration and Geometric Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Alkan,+M">Muhammet Alkan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Veldtman,+G">Gruschen Veldtman</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Deligianni,+F">Fani Deligianni</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> preprint for Scientific Reports </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> Cardiopulmonary exercise testing (CPET) provides a comprehensive assessment of functional capacity by measuring key physiological variables including oxygen consumption ($VO_2$), carbon dioxide production ($VCO_2$), and pulmonary ventilation ($VE$) during exercise. Previous research has established that parameters such as peak $VO_2$ and $VE/VCO_2$ ratio serve as robust predictors of mortality risk in chronic heart failure patients. In this study, we leverage CPET variables as surrogate mortality endpoints for patients with Congenital Heart Disease (CHD). To our knowledge, this represents the first successful implementation of an advanced machine learning approach that predicts CPET outcomes by integrating electrocardiograms (ECGs) with information derived from clinical letters. Our methodology began with extracting unstructured patient information-including intervention history, diagnoses, and medication regimens-from clinical letters using natural language processing techniques, organizing this data into a structured database. We then digitized ECGs to obtain quantifiable waveforms and established comprehensive data linkages. The core innovation of our approach lies in exploiting the Riemannian geometric properties of covariance matrices derived from both 12-lead ECGs and clinical text data to develop robust regression and classification models. Through extensive ablation studies, we demonstrated that the integration of ECG signals with clinical documentation, enhanced by covariance augmentation techniques in Riemannian space, consistently produced superior predictive performance compared to conventional approaches. </p> </div> </dd> <dt> <a name='item58'>[58]</a> <a href ="/abs/2503.14240" title="Abstract" id="2503.14240"> arXiv:2503.14240 </a> [<a href="/pdf/2503.14240" title="Download PDF" id="pdf-2503.14240" aria-labelledby="pdf-2503.14240">pdf</a>, <a href="https://arxiv.org/html/2503.14240v1" title="View HTML" id="html-2503.14240" aria-labelledby="html-2503.14240" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14240" title="Other formats" id="oth-2503.14240" aria-labelledby="oth-2503.14240">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Persistent Homology-induced Graph Ensembles for Time Series Regressions </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Nguyen,+V+T">Viet The Nguyen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pham,+D+A">Duy Anh Pham</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Le,+A+T">An Thai Le</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Peter,+J">Jans Peter</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gust,+G">Gunther Gust</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> The effectiveness of Spatio-temporal Graph Neural Networks (STGNNs) in time-series applications is often limited by their dependence on fixed, hand-crafted input graph structures. Motivated by insights from the Topological Data Analysis (TDA) paradigm, of which real-world data exhibits multi-scale patterns, we construct several graphs using \textit{Persistent Homology Filtration} -- a mathematical framework describing the multiscale structural properties of data points. Then, we use the constructed graphs as an input to create an ensemble of Graph Neural Networks. The ensemble aggregates the signals from the individual learners via an attention-based routing mechanism, thus systematically encoding the inherent multiscale structures of data. Four different real-world experiments on seismic activity prediction and traffic forecasting (PEMS-BAY, METR-LA) demonstrate that our approach consistently outperforms single-graph baselines while providing interpretable insights. </p> </div> </dd> <dt> <a name='item59'>[59]</a> <a href ="/abs/2503.14246" title="Abstract" id="2503.14246"> arXiv:2503.14246 </a> [<a href="/pdf/2503.14246" title="Download PDF" id="pdf-2503.14246" aria-labelledby="pdf-2503.14246">pdf</a>, <a href="https://arxiv.org/html/2503.14246v1" title="View HTML" id="html-2503.14246" aria-labelledby="html-2503.14246" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14246" title="Other formats" id="oth-2503.14246" aria-labelledby="oth-2503.14246">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Trading-off Accuracy and Communication Cost in Federated Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Villani,+M+J">Mattia Jacopo Villani</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Natale,+E">Emanuele Natale</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mallmann-Trenn,+F">Frederik Mallmann-Trenn</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Leveraging the training-by-pruning paradigm introduced by Zhou et al. and Isik et al. introduced a federated learning protocol that achieves a 34-fold reduction in communication cost. We achieve a compression improvements of orders of orders of magnitude over the state-of-the-art. The central idea of our framework is to encode the network weights $\vec w$ by a the vector of trainable parameters $\vec p$, such that $\vec w = Q\cdot \vec p$ where $Q$ is a carefully-generate sparse random matrix (that remains fixed throughout training). In such framework, the previous work of Zhou et al. [NeurIPS'19] is retrieved when $Q$ is diagonal and $\vec p$ has the same dimension of $\vec w$. We instead show that $\vec p$ can effectively be chosen much smaller than $\vec w$, while retaining the same accuracy at the price of a decrease of the sparsity of $Q$. Since server and clients only need to share $\vec p$, such a trade-off leads to a substantial improvement in communication cost. Moreover, we provide theoretical insight into our framework and establish a novel link between training-by-sampling and random convex geometry. </p> </div> </dd> <dt> <a name='item60'>[60]</a> <a href ="/abs/2503.14259" title="Abstract" id="2503.14259"> arXiv:2503.14259 </a> [<a href="/pdf/2503.14259" title="Download PDF" id="pdf-2503.14259" aria-labelledby="pdf-2503.14259">pdf</a>, <a href="https://arxiv.org/html/2503.14259v1" title="View HTML" id="html-2503.14259" aria-labelledby="html-2503.14259" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14259" title="Other formats" id="oth-2503.14259" aria-labelledby="oth-2503.14259">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Quantization-Free Autoregressive Action Transformer </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Sheebaelhamd,+Z">Ziyad Sheebaelhamd</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tschannen,+M">Michael Tschannen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Muehlebach,+M">Michael Muehlebach</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Vernade,+C">Claire Vernade</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Robotics (cs.RO) </div> <p class='mathjax'> Current transformer-based imitation learning approaches introduce discrete action representations and train an autoregressive transformer decoder on the resulting latent code. However, the initial quantization breaks the continuous structure of the action space thereby limiting the capabilities of the generative model. We propose a quantization-free method instead that leverages Generative Infinite-Vocabulary Transformers (GIVT) as a direct, continuous policy parametrization for autoregressive transformers. This simplifies the imitation learning pipeline while achieving state-of-the-art performance on a variety of popular simulated robotics tasks. We enhance our policy roll-outs by carefully studying sampling algorithms, further improving the results. </p> </div> </dd> <dt> <a name='item61'>[61]</a> <a href ="/abs/2503.14286" title="Abstract" id="2503.14286"> arXiv:2503.14286 </a> [<a href="/pdf/2503.14286" title="Download PDF" id="pdf-2503.14286" aria-labelledby="pdf-2503.14286">pdf</a>, <a href="https://arxiv.org/html/2503.14286v1" title="View HTML" id="html-2503.14286" aria-labelledby="html-2503.14286" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14286" title="Other formats" id="oth-2503.14286" aria-labelledby="oth-2503.14286">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Tapered Off-Policy REINFORCE: Stable and efficient reinforcement learning for LLMs </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Roux,+N+L">Nicolas Le Roux</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bellemare,+M+G">Marc G. Bellemare</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lebensold,+J">Jonathan Lebensold</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bergeron,+A">Arnaud Bergeron</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Greaves,+J">Joshua Greaves</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fr%C3%A9chette,+A">Alex Fr茅chette</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pelletier,+C">Carolyne Pelletier</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Toth,+E+T+S">Eric Thibodeau-Laufer S谩ndor Toth</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Work,+S">Samantha Work</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> We propose a new algorithm for fine-tuning large language models using reinforcement learning. Tapered Off-Policy REINFORCE (TOPR) uses an asymmetric, tapered variant of importance sampling to speed up learning while maintaining stable learning dynamics, even without the use of KL regularization. TOPR can be applied in a fully offline fashion, allows the handling of positive and negative examples in a unified framework, and benefits from the implementational simplicity that is typical of Monte Carlo algorithms. We demonstrate the effectiveness of our approach with a series of experiments on the GSM8K and MATH reasoning benchmarks, finding performance gains for training both a model for solution generation and as a generative verifier. We show that properly leveraging positive and negative examples alike in the off-policy regime simultaneously increases test-time accuracy and training data efficiency, all the while avoiding the ``wasted inference'' that comes with discarding negative examples. We find that this advantage persists over multiple iterations of training and can be amplified by dataset curation techniques, enabling us to match 70B-parameter model performance with 8B language models. As a corollary to this work, we find that REINFORCE's baseline parameter plays an important and unexpected role in defining dataset composition in the presence of negative examples, and is consequently critical in driving off-policy performance. </p> </div> </dd> <dt> <a name='item62'>[62]</a> <a href ="/abs/2503.14297" title="Abstract" id="2503.14297"> arXiv:2503.14297 </a> [<a href="/pdf/2503.14297" title="Download PDF" id="pdf-2503.14297" aria-labelledby="pdf-2503.14297">pdf</a>, <a href="https://arxiv.org/html/2503.14297v1" title="View HTML" id="html-2503.14297" aria-labelledby="html-2503.14297" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14297" title="Other formats" id="oth-2503.14297" aria-labelledby="oth-2503.14297">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Improved Scalable Lipschitz Bounds for Deep Neural Networks </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Syed,+U">Usman Syed</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hu,+B">Bin Hu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Systems and Control (eess.SY); Optimization and Control (math.OC); Machine Learning (stat.ML) </div> <p class='mathjax'> Computing tight Lipschitz bounds for deep neural networks is crucial for analyzing their robustness and stability, but existing approaches either produce relatively conservative estimates or rely on semidefinite programming (SDP) formulations (namely the LipSDP condition) that face scalability issues. Building upon ECLipsE-Fast, the state-of-the-art Lipschitz bound method that avoids SDP formulations, we derive a new family of improved scalable Lipschitz bounds that can be combined to outperform ECLipsE-Fast. Specifically, we leverage more general parameterizations of feasible points of LipSDP to derive various closed-form Lipschitz bounds, avoiding the use of SDP solvers. In addition, we show that our technique encompasses ECLipsE-Fast as a special case and leads to a much larger class of scalable Lipschitz bounds for deep neural networks. Our empirical study shows that our bounds improve ECLipsE-Fast, further advancing the scalability and precision of Lipschitz estimation for large neural networks. </p> </div> </dd> <dt> <a name='item63'>[63]</a> <a href ="/abs/2503.14299" title="Abstract" id="2503.14299"> arXiv:2503.14299 </a> [<a href="/pdf/2503.14299" title="Download PDF" id="pdf-2503.14299" aria-labelledby="pdf-2503.14299">pdf</a>, <a href="https://arxiv.org/html/2503.14299v1" title="View HTML" id="html-2503.14299" aria-labelledby="html-2503.14299" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14299" title="Other formats" id="oth-2503.14299" aria-labelledby="oth-2503.14299">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Unveiling the Role of Randomization in Multiclass Adversarial Classification: Insights from Graph Theory </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Gnecco-Heredia,+L">Lucas Gnecco-Heredia</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sammut,+M">Matteo Sammut</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pydi,+M+S">Muni Sreenivas Pydi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pinot,+R">Rafael Pinot</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Negrevergne,+B">Benjamin Negrevergne</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chevaleyre,+Y">Yann Chevaleyre</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 9 pages (main), 30 in total. Camera-ready version, accepted at AISTATS 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> Randomization as a mean to improve the adversarial robustness of machine learning models has recently attracted significant attention. Unfortunately, much of the theoretical analysis so far has focused on binary classification, providing only limited insights into the more complex multiclass setting. In this paper, we take a step toward closing this gap by drawing inspiration from the field of graph theory. Our analysis focuses on discrete data distributions, allowing us to cast the adversarial risk minimization problems within the well-established framework of set packing problems. By doing so, we are able to identify three structural conditions on the support of the data distribution that are necessary for randomization to improve robustness. Furthermore, we are able to construct several data distributions where (contrarily to binary classification) switching from a deterministic to a randomized solution significantly reduces the optimal adversarial risk. These findings highlight the crucial role randomization can play in enhancing robustness to adversarial attacks in multiclass classification. </p> </div> </dd> <dt> <a name='item64'>[64]</a> <a href ="/abs/2503.14301" title="Abstract" id="2503.14301"> arXiv:2503.14301 </a> [<a href="/pdf/2503.14301" title="Download PDF" id="pdf-2503.14301" aria-labelledby="pdf-2503.14301">pdf</a>, <a href="https://arxiv.org/html/2503.14301v1" title="View HTML" id="html-2503.14301" aria-labelledby="html-2503.14301" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14301" title="Other formats" id="oth-2503.14301" aria-labelledby="oth-2503.14301">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> FeNeC: Enhancing Continual Learning via Feature Clustering with Neighbor- or Logit-Based Classification </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ksi%C4%85%C5%BCek,+K">Kamil Ksi膮偶ek</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jastrz%C4%99bski,+H">Hubert Jastrz臋bski</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Trojan,+B">Bartosz Trojan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pniaczek,+K">Krzysztof Pniaczek</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Karp,+M">Micha艂 Karp</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tabor,+J">Jacek Tabor</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> The ability of deep learning models to learn continuously is essential for adapting to new data categories and evolving data distributions. In recent years, approaches leveraging frozen feature extractors after an initial learning phase have been extensively studied. Many of these methods estimate per-class covariance matrices and prototypes based on backbone-derived feature representations. Within this paradigm, we introduce FeNeC (Feature Neighborhood Classifier) and FeNeC-Log, its variant based on the log-likelihood function. Our approach generalizes the existing concept by incorporating data clustering to capture greater intra-class variability. Utilizing the Mahalanobis distance, our models classify samples either through a nearest neighbor approach or trainable logit values assigned to consecutive classes. Our proposition may be reduced to the existing approaches in a special case while extending them with the ability of more flexible adaptation to data. We demonstrate that two FeNeC variants achieve competitive performance in scenarios where task identities are unknown and establish state-of-the-art results on several benchmarks. </p> </div> </dd> <dt> <a name='item65'>[65]</a> <a href ="/abs/2503.14321" title="Abstract" id="2503.14321"> arXiv:2503.14321 </a> [<a href="/pdf/2503.14321" title="Download PDF" id="pdf-2503.14321" aria-labelledby="pdf-2503.14321">pdf</a>, <a href="/format/2503.14321" title="Other formats" id="oth-2503.14321" aria-labelledby="oth-2503.14321">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> COPA: Comparing the Incomparable to Explore the Pareto Front </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Javaloy,+A">Adri谩n Javaloy</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Vergari,+A">Antonio Vergari</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Valera,+I">Isabel Valera</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 19 pages, 14 figures. Under submission </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> In machine learning (ML), it is common to account for multiple objectives when, e.g., selecting a model to deploy. However, it is often unclear how one should compare, aggregate and, ultimately, trade-off these objectives, as they might be measured in different units or scales. For example, when deploying large language models (LLMs), we might not only care about their performance, but also their CO2 consumption. In this work, we investigate how objectives can be sensibly compared and aggregated to navigate their Pareto front. To do so, we propose to make incomparable objectives comparable via their CDFs, approximated by their relative rankings. This allows us to aggregate them while matching user-specific preferences, allowing practitioners to meaningfully navigate and search for models in the Pareto front. We demonstrate the potential impact of our methodology in diverse areas such as LLM selection, domain generalization, and AutoML benchmarking, where classical ways to aggregate and normalize objectives fail. </p> </div> </dd> <dt> <a name='item66'>[66]</a> <a href ="/abs/2503.14333" title="Abstract" id="2503.14333"> arXiv:2503.14333 </a> [<a href="/pdf/2503.14333" title="Download PDF" id="pdf-2503.14333" aria-labelledby="pdf-2503.14333">pdf</a>, <a href="https://arxiv.org/html/2503.14333v1" title="View HTML" id="html-2503.14333" aria-labelledby="html-2503.14333" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14333" title="Other formats" id="oth-2503.14333" aria-labelledby="oth-2503.14333">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Revealing higher-order neural representations with generative artificial intelligence </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Asrari,+H+A">Hojjat Azimi Asrari</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Peters,+M+A+K">Megan A. K. Peters</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Neurons and Cognition (q-bio.NC) </div> <p class='mathjax'> Studies often aim to reveal how neural representations encode aspects of an observer's environment, such as its contents or structure. These are ``first-order" representations (FORs), because they're ``about" the external world. A less-common target is ``higher-order" representations (HORs), which are ``about" FORs -- their contents, stability, or uncertainty. HORs of uncertainty appear critically involved in adaptive behaviors including learning under uncertainty, influencing learning rates and internal model updating based on environmental feedback. However, HORs about uncertainty are unlikely to be direct ``read-outs" of FOR characteristics, instead reflecting estimation processes which may be lossy, bias-prone, or distortive and which may also incorporate estimates of distributions of uncertainty the observer is likely to experience. While some research has targeted neural representations of ``instantaneously" estimated uncertainty, how the brain represents \textit{distributions} of expected uncertainty remains largely unexplored. Here, we propose a novel reinforcement learning (RL) based generative artificial intelligence (genAI) approach to explore neural representations of uncertainty distributions. We use existing functional magnetic resonance imaging data, where humans learned to `de-noise' their brain states to achieve target neural patterns, to train denoising diffusion genAI models with RL algorithms to learn noise distributions similar to how humans might learn to do the same. We then explore these models' learned noise-distribution HORs compared to control models trained with traditional backpropagation. Results reveal model-dependent differences in noise distribution representations -- with the RL-based model offering much higher explanatory power for human behavior -- offering an exciting path towards using genAI to explore neural noise-distribution HORs. </p> </div> </dd> <dt> <a name='item67'>[67]</a> <a href ="/abs/2503.14337" title="Abstract" id="2503.14337"> arXiv:2503.14337 </a> [<a href="/pdf/2503.14337" title="Download PDF" id="pdf-2503.14337" aria-labelledby="pdf-2503.14337">pdf</a>, <a href="https://arxiv.org/html/2503.14337v1" title="View HTML" id="html-2503.14337" aria-labelledby="html-2503.14337" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14337" title="Other formats" id="oth-2503.14337" aria-labelledby="oth-2503.14337">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> PENCIL: Long Thoughts with Short Memory </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+C">Chenxiao Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nathan">Nathan Srebro</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=McAllester,+D">David McAllester</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+Z">Zhiyuan Li</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Computation and Language (cs.CL) </div> <p class='mathjax'> While recent works (e.g. o1, DeepSeek R1) have demonstrated great promise of using long Chain-of-Thought (CoT) to improve reasoning capabilities of language models, scaling it up during test-time is challenging due to inefficient memory usage -- intermediate computations accumulate indefinitely in context even no longer needed for future thoughts. We propose PENCIL, which incorporates a reduction mechanism into the autoregressive generation process, allowing the model to recursively clean up intermediate thoughts based on patterns learned from training. With this reduction mechanism, PENCIL significantly reduces the maximal context length required during generation, and thus can generate longer thoughts with limited memory, solving larger-scale problems given more thinking time. For example, we demonstrate PENCIL achieves 97\% accuracy on the challenging Einstein's puzzle -- a task even large models like GPT-4 struggle with -- using only a small 25M-parameter transformer with 2048 context length. Theoretically, we prove PENCIL can perform universal space-efficient computation by simulating Turing machines with optimal time and space complexity, and thus can solve arbitrary computational tasks that would otherwise be intractable given context window constraints. </p> </div> </dd> <dt> <a name='item68'>[68]</a> <a href ="/abs/2503.14338" title="Abstract" id="2503.14338"> arXiv:2503.14338 </a> [<a href="/pdf/2503.14338" title="Download PDF" id="pdf-2503.14338" aria-labelledby="pdf-2503.14338">pdf</a>, <a href="https://arxiv.org/html/2503.14338v1" title="View HTML" id="html-2503.14338" aria-labelledby="html-2503.14338" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14338" title="Other formats" id="oth-2503.14338" aria-labelledby="oth-2503.14338">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Higher-Order Graphon Neural Networks: Approximation and Cut Distance </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Herbst,+D">Daniel Herbst</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jegelka,+S">Stefanie Jegelka</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 51 pages, 6 figures, ICLR 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> Graph limit models, like graphons for limits of dense graphs, have recently been used to study size transferability of graph neural networks (GNNs). While most literature focuses on message passing GNNs (MPNNs), in this work we attend to the more powerful higher-order GNNs. First, we extend the $k$-WL test for graphons (B枚ker, 2023) to the graphon-signal space and introduce signal-weighted homomorphism densities as a key tool. As an exemplary focus, we generalize Invariant Graph Networks (IGNs) to graphons, proposing Invariant Graphon Networks (IWNs) defined via a subset of the IGN basis corresponding to bounded linear operators. Even with this restricted basis, we show that IWNs of order $k$ are at least as powerful as the $k$-WL test, and we establish universal approximation results for graphon-signals in $L^p$ distances. This significantly extends the prior work of Cai & Wang (2022), showing that IWNs--a subset of their IGN-small--retain effectively the same expressivity as the full IGN basis in the limit. In contrast to their approach, our blueprint of IWNs also aligns better with the geometry of graphon space, for example facilitating comparability to MPNNs. We highlight that, while typical higher-order GNNs are discontinuous w.r.t. cut distance--which causes their lack of convergence and is inherently tied to the definition of $k$-WL--their transferability remains comparable to MPNNs. </p> </div> </dd> <dt> <a name='item69'>[69]</a> <a href ="/abs/2503.14342" title="Abstract" id="2503.14342"> arXiv:2503.14342 </a> [<a href="/pdf/2503.14342" title="Download PDF" id="pdf-2503.14342" aria-labelledby="pdf-2503.14342">pdf</a>, <a href="https://arxiv.org/html/2503.14342v1" title="View HTML" id="html-2503.14342" aria-labelledby="html-2503.14342" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14342" title="Other formats" id="oth-2503.14342" aria-labelledby="oth-2503.14342">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> End-to-End Optimal Detector Design with Mutual Information Surrogates </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wozniak,+K+A">Kinga Anna Wozniak</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mulligan,+S">Stephen Mulligan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kieseler,+J">Jan Kieseler</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Klute,+M">Markus Klute</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fleuret,+F">Francois Fleuret</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Golling,+T">Tobias Golling</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; High Energy Physics - Phenomenology (hep-ph) </div> <p class='mathjax'> We introduce a novel approach for end-to-end black-box optimization of high energy physics (HEP) detectors using local deep learning (DL) surrogates. These surrogates approximate a scalar objective function that encapsulates the complex interplay of particle-matter interactions and physics analysis goals. In addition to a standard reconstruction-based metric commonly used in the field, we investigate the information-theoretic metric of mutual information. Unlike traditional methods, mutual information is inherently task-agnostic, offering a broader optimization paradigm that is less constrained by predefined targets. We demonstrate the effectiveness of our method in a realistic physics analysis scenario: optimizing the thicknesses of calorimeter detector layers based on simulated particle interactions. The surrogate model learns to approximate objective gradients, enabling efficient optimization with respect to energy resolution. Our findings reveal three key insights: (1) end-to-end black-box optimization using local surrogates is a practical and compelling approach for detector design, providing direct optimization of detector parameters in alignment with physics analysis goals; (2) mutual information-based optimization yields design choices that closely match those from state-of-the-art physics-informed methods, indicating that these approaches operate near optimality and reinforcing their reliability in HEP detector design; and (3) information-theoretic methods provide a powerful, generalizable framework for optimizing scientific instruments. By reframing the optimization process through an information-theoretic lens rather than domain-specific heuristics, mutual information enables the exploration of new avenues for discovery beyond conventional approaches. </p> </div> </dd> <dt> <a name='item70'>[70]</a> <a href ="/abs/2503.14356" title="Abstract" id="2503.14356"> arXiv:2503.14356 </a> [<a href="/pdf/2503.14356" title="Download PDF" id="pdf-2503.14356" aria-labelledby="pdf-2503.14356">pdf</a>, <a href="https://arxiv.org/html/2503.14356v1" title="View HTML" id="html-2503.14356" aria-labelledby="html-2503.14356" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14356" title="Other formats" id="oth-2503.14356" aria-labelledby="oth-2503.14356">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Benchmarking community drug response prediction models: datasets, models, tools, and metrics for cross-dataset generalization analysis </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Partin,+A">Alexander Partin</a> (1), <a href="https://arxiv.org/search/cs?searchtype=author&query=Vasanthakumari,+P">Priyanka Vasanthakumari</a> (1), <a href="https://arxiv.org/search/cs?searchtype=author&query=Narykov,+O">Oleksandr Narykov</a> (1), <a href="https://arxiv.org/search/cs?searchtype=author&query=Wilke,+A">Andreas Wilke</a> (1), <a href="https://arxiv.org/search/cs?searchtype=author&query=Koussa,+N">Natasha Koussa</a> (2), <a href="https://arxiv.org/search/cs?searchtype=author&query=Jones,+S+E">Sara E. Jones</a> (2), <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+Y">Yitan Zhu</a> (1), <a href="https://arxiv.org/search/cs?searchtype=author&query=Overbeek,+J+C">Jamie C. Overbeek</a> (1), <a href="https://arxiv.org/search/cs?searchtype=author&query=Jain,+R">Rajeev Jain</a> (1), <a href="https://arxiv.org/search/cs?searchtype=author&query=Fernando,+G+D">Gayara Demini Fernando</a> (3), <a href="https://arxiv.org/search/cs?searchtype=author&query=Sanchez-Villalobos,+C">Cesar Sanchez-Villalobos</a> (4), <a href="https://arxiv.org/search/cs?searchtype=author&query=Garcia-Cardona,+C">Cristina Garcia-Cardona</a> (5), <a href="https://arxiv.org/search/cs?searchtype=author&query=Mohd-Yusof,+J">Jamaludin Mohd-Yusof</a> (5), <a href="https://arxiv.org/search/cs?searchtype=author&query=Chia,+N">Nicholas Chia</a> (1), <a href="https://arxiv.org/search/cs?searchtype=author&query=Wozniak,+J+M">Justin M. Wozniak</a> (1), <a href="https://arxiv.org/search/cs?searchtype=author&query=Ghosh,+S">Souparno Ghosh</a> (3), <a href="https://arxiv.org/search/cs?searchtype=author&query=Pal,+R">Ranadip Pal</a> (4), <a href="https://arxiv.org/search/cs?searchtype=author&query=Brettin,+T+S">Thomas S. Brettin</a> (1), <a href="https://arxiv.org/search/cs?searchtype=author&query=Weil,+M+R">M. Ryan Weil</a> (2), <a href="https://arxiv.org/search/cs?searchtype=author&query=Stevens,+R+L">Rick L. Stevens</a> (1 and 6) ((1) Division of Data Science and Learning, Argonne National Laboratory, Lemont, IL, USA, (2) Frederick National Laboratory for Cancer Research, Cancer Data Science Initiatives, Cancer Research Technology Program, Frederick, MD, USA, (3) Department of Statistics, University of Nebraska-Lincoln, Lincoln, NE, USA, (4) Department of Electrical and Computer Engineering, Texas Tech University, Lubbock, TX, USA, (5) Division of Computer, Computational and Statistical Sciences, Los Alamos National Laboratory, Los Alamos, NM, USA, (6) Department of Computer Science, The University of Chicago, Chicago, IL, USA)</div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 18 pages, 9 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Quantitative Methods (q-bio.QM) </div> <p class='mathjax'> Deep learning (DL) and machine learning (ML) models have shown promise in drug response prediction (DRP), yet their ability to generalize across datasets remains an open question, raising concerns about their real-world applicability. Due to the lack of standardized benchmarking approaches, model evaluations and comparisons often rely on inconsistent datasets and evaluation criteria, making it difficult to assess true predictive capabilities. In this work, we introduce a benchmarking framework for evaluating cross-dataset prediction generalization in DRP models. Our framework incorporates five publicly available drug screening datasets, six standardized DRP models, and a scalable workflow for systematic evaluation. To assess model generalization, we introduce a set of evaluation metrics that quantify both absolute performance (e.g., predictive accuracy across datasets) and relative performance (e.g., performance drop compared to within-dataset results), enabling a more comprehensive assessment of model transferability. Our results reveal substantial performance drops when models are tested on unseen datasets, underscoring the importance of rigorous generalization assessments. While several models demonstrate relatively strong cross-dataset generalization, no single model consistently outperforms across all datasets. Furthermore, we identify CTRPv2 as the most effective source dataset for training, yielding higher generalization scores across target datasets. By sharing this standardized evaluation framework with the community, our study aims to establish a rigorous foundation for model comparison, and accelerate the development of robust DRP models for real-world applications. </p> </div> </dd> <dt> <a name='item71'>[71]</a> <a href ="/abs/2503.14357" title="Abstract" id="2503.14357"> arXiv:2503.14357 </a> [<a href="/pdf/2503.14357" title="Download PDF" id="pdf-2503.14357" aria-labelledby="pdf-2503.14357">pdf</a>, <a href="https://arxiv.org/html/2503.14357v1" title="View HTML" id="html-2503.14357" aria-labelledby="html-2503.14357" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14357" title="Other formats" id="oth-2503.14357" aria-labelledby="oth-2503.14357">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Wasserstein-based Kernels for Clustering: Application to Power Distribution Graphs </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Oneto,+A">Alfredo Oneto</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gjorgiev,+B">Blazhe Gjorgiev</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sansavini,+G">Giovanni Sansavini</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Applications (stat.AP) </div> <p class='mathjax'> Many data clustering applications must handle objects that cannot be represented as vector data. In this context, the bag-of-vectors representation can be leveraged to describe complex objects through discrete distributions, and the Wasserstein distance can effectively measure the dissimilarity between them. Additionally, kernel methods can be used to embed data into feature spaces that are easier to analyze. Despite significant progress in data clustering, a method that simultaneously accounts for distributional and vectorial dissimilarity measures is still lacking. To tackle this gap, this work explores kernel methods and Wasserstein distance metrics to develop a computationally tractable clustering framework. The compositional properties of kernels allow the simultaneous handling of different metrics, enabling the integration of both vectors and discrete distributions for object representation. This approach is flexible enough to be applied in various domains, such as graph analysis and image processing. The framework consists of three main components. First, we efficiently approximate pairwise Wasserstein distances using multiple reference distributions. Second, we employ kernel functions based on Wasserstein distances and present ways of composing kernels to express different types of information. Finally, we use the kernels to cluster data and evaluate the quality of the results using scalable and distance-agnostic validity indices. A case study involving two datasets of 879 and 34,920 power distribution graphs demonstrates the framework's effectiveness and efficiency. </p> </div> </dd> <dt> <a name='item72'>[72]</a> <a href ="/abs/2503.14376" title="Abstract" id="2503.14376"> arXiv:2503.14376 </a> [<a href="/pdf/2503.14376" title="Download PDF" id="pdf-2503.14376" aria-labelledby="pdf-2503.14376">pdf</a>, <a href="https://arxiv.org/html/2503.14376v1" title="View HTML" id="html-2503.14376" aria-labelledby="html-2503.14376" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14376" title="Other formats" id="oth-2503.14376" aria-labelledby="oth-2503.14376">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Tiled Flash Linear Attention: More Efficient Linear RNN and xLSTM Kernels </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Beck,+M">Maximilian Beck</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=P%C3%B6ppel,+K">Korbinian P枚ppel</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lippe,+P">Phillip Lippe</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hochreiter,+S">Sepp Hochreiter</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Code available at: <a href="https://github.com/NX-AI/mlstm_kernels" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Linear RNNs with gating recently demonstrated competitive performance compared to Transformers in language modeling. Although their linear compute scaling in sequence length offers theoretical runtime advantages over Transformers, realizing these benefits in practice requires optimized custom kernels, as Transformers rely on the highly efficient Flash Attention kernels. Leveraging the chunkwise-parallel formulation of linear RNNs, Flash Linear Attention (FLA) shows that linear RNN kernels are faster than Flash Attention, by parallelizing over chunks of the input sequence. However, since the chunk size of FLA is limited, many intermediate states must be materialized in GPU memory. This leads to low arithmetic intensity and causes high memory consumption and IO cost, especially for long-context pre-training. In this work, we present Tiled Flash Linear Attention (TFLA), a novel kernel algorithm for linear RNNs, that enables arbitrary large chunk sizes by introducing an additional level of sequence parallelization within each chunk. First, we apply TFLA to the xLSTM with matrix memory, the mLSTM. Second, we propose an mLSTM variant with sigmoid input gate and reduced computation for even faster kernel runtimes at equal language modeling performance. In our speed benchmarks, we show that our new mLSTM kernels based on TFLA outperform highly optimized Flash Attention, Linear Attention and Mamba kernels, setting a new state of the art for efficient long-context sequence modeling primitives. </p> </div> </dd> <dt> <a name='item73'>[73]</a> <a href ="/abs/2503.14393" title="Abstract" id="2503.14393"> arXiv:2503.14393 </a> [<a href="/pdf/2503.14393" title="Download PDF" id="pdf-2503.14393" aria-labelledby="pdf-2503.14393">pdf</a>, <a href="https://arxiv.org/html/2503.14393v1" title="View HTML" id="html-2503.14393" aria-labelledby="html-2503.14393" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14393" title="Other formats" id="oth-2503.14393" aria-labelledby="oth-2503.14393">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> On the clustering behavior of sliding windows </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Alexeev,+B">Boris Alexeev</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Luo,+W">Wenyan Luo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mixon,+D+G">Dustin G. Mixon</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Y+X">Yan X Zhang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> Things can go spectacularly wrong when clustering timeseries data that has been preprocessed with a sliding window. We highlight three surprising failures that emerge depending on how the window size compares with the timeseries length. In addition to computational examples, we present theoretical explanations for each of these failure modes. </p> </div> </dd> <dt> <a name='item74'>[74]</a> <a href ="/abs/2503.14396" title="Abstract" id="2503.14396"> arXiv:2503.14396 </a> [<a href="/pdf/2503.14396" title="Download PDF" id="pdf-2503.14396" aria-labelledby="pdf-2503.14396">pdf</a>, <a href="https://arxiv.org/html/2503.14396v1" title="View HTML" id="html-2503.14396" aria-labelledby="html-2503.14396" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14396" title="Other formats" id="oth-2503.14396" aria-labelledby="oth-2503.14396">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Technical Report: Aggregation on Learnable Manifolds for Asynchronous Federated Optimization </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Licudi,+A">Archie Licudi</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 22 pages, 3 figures. Preliminary technical project report </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> In Federated Learning (FL), a primary challenge to the server-side aggregation of client models is device heterogeneity, in both loss landscape geometry and computational capacity. This issue can be particularly pronounced in clinical contexts where variations in data distribution (aggravated by class imbalance), infrastructure requirements, and sample sizes are common. We propose AsyncManifold, a novel asynchronous FL framework to address these issues by taking advantage of underlying solution space geometry, at each of the local training, delay-correction, and aggregation stages. Our proposal is accompanied by a convergence proof in a general form and, motivated thorough exploratory studies of local behaviour, a proof-of-concept algorithm which performs aggregation along non-linear mode connections and hence avoids barriers to convergence that techniques based on linear interpolation will encounter. </p> </div> </dd> <dt> <a name='item75'>[75]</a> <a href ="/abs/2503.14403" title="Abstract" id="2503.14403"> arXiv:2503.14403 </a> [<a href="/pdf/2503.14403" title="Download PDF" id="pdf-2503.14403" aria-labelledby="pdf-2503.14403">pdf</a>, <a href="https://arxiv.org/html/2503.14403v1" title="View HTML" id="html-2503.14403" aria-labelledby="html-2503.14403" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14403" title="Other formats" id="oth-2503.14403" aria-labelledby="oth-2503.14403">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Landscape Complexity for the Empirical Risk of Generalized Linear Models: Discrimination between Structured Data </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Tsironis,+T+G">Theodoros G. Tsironis</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Moustakas,+A+L">Aris L. Moustakas</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Statistical Mechanics (cond-mat.stat-mech); Machine Learning (stat.ML) </div> <p class='mathjax'> We use the Kac-Rice formula and results from random matrix theory to obtain the average number of critical points of a family of high-dimensional empirical loss functions, where the data are correlated $d$-dimensional Gaussian vectors, whose number has a fixed ratio with their dimension. The correlations are introduced to model the existence of structure in the data, as is common in current Machine-Learning systems. Under a technical hypothesis, our results are exact in the large-$d$ limit, and characterize the annealed landscape complexity, namely the logarithm of the expected number of critical points at a given value of the loss. <br>We first address in detail the landscape of the loss function of a single perceptron and then generalize it to the case where two competing data sets with different covariance matrices are present, with the perceptron seeking to discriminate between them. The latter model can be applied to understand the interplay between adversity and non-trivial data structure. For completeness, we also treat the case of a loss function used in training Generalized Linear Models in the presence of correlated input data. </p> </div> </dd> <dt> <a name='item76'>[76]</a> <a href ="/abs/2503.14434" title="Abstract" id="2503.14434"> arXiv:2503.14434 </a> [<a href="/pdf/2503.14434" title="Download PDF" id="pdf-2503.14434" aria-labelledby="pdf-2503.14434">pdf</a>, <a href="https://arxiv.org/html/2503.14434v1" title="View HTML" id="html-2503.14434" aria-labelledby="html-2503.14434" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14434" title="Other formats" id="oth-2503.14434" aria-labelledby="oth-2503.14434">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> LLM-FE: Automated Feature Engineering for Tabular Data with LLMs as Evolutionary Optimizers </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Abhyankar,+N">Nikhil Abhyankar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shojaee,+P">Parshin Shojaee</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Reddy,+C+K">Chandan K. Reddy</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Computation and Language (cs.CL); Neural and Evolutionary Computing (cs.NE) </div> <p class='mathjax'> Automated feature engineering plays a critical role in improving predictive model performance for tabular learning tasks. Traditional automated feature engineering methods are limited by their reliance on pre-defined transformations within fixed, manually designed search spaces, often neglecting domain knowledge. Recent advances using Large Language Models (LLMs) have enabled the integration of domain knowledge into the feature engineering process. However, existing LLM-based approaches use direct prompting or rely solely on validation scores for feature selection, failing to leverage insights from prior feature discovery experiments or establish meaningful reasoning between feature generation and data-driven performance. To address these challenges, we propose LLM-FE, a novel framework that combines evolutionary search with the domain knowledge and reasoning capabilities of LLMs to automatically discover effective features for tabular learning tasks. LLM-FE formulates feature engineering as a program search problem, where LLMs propose new feature transformation programs iteratively, and data-driven feedback guides the search process. Our results demonstrate that LLM-FE consistently outperforms state-of-the-art baselines, significantly enhancing the performance of tabular prediction models across diverse classification and regression benchmarks. </p> </div> </dd> <dt> <a name='item77'>[77]</a> <a href ="/abs/2503.14439" title="Abstract" id="2503.14439"> arXiv:2503.14439 </a> [<a href="/pdf/2503.14439" title="Download PDF" id="pdf-2503.14439" aria-labelledby="pdf-2503.14439">pdf</a>, <a href="https://arxiv.org/html/2503.14439v1" title="View HTML" id="html-2503.14439" aria-labelledby="html-2503.14439" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14439" title="Other formats" id="oth-2503.14439" aria-labelledby="oth-2503.14439">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Graph-CNNs for RF Imaging: Learning the Electric Field Integral Equations </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Stylianopoulos,+K">Kyriakos Stylianopoulos</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gavriilidis,+P">Panagiotis Gavriilidis</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gradoni,+G">Gabriele Gradoni</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Alexandropoulos,+G+C">George C. Alexandropoulos</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Submitted to EUSIPCO 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Signal Processing (eess.SP) </div> <p class='mathjax'> Radio-Frequency (RF) imaging concerns the digital recreation of the surfaces of scene objects based on the scattered field at distributed receivers. To solve this difficult inverse scattering problems, data-driven methods are often employed that extract patterns from similar training examples, while offering minimal latency. In this paper, we first provide an approximate yet fast electromagnetic model, which is based on the electric field integral equations, for data generation, and subsequently propose a Deep Neural Network (DNN) architecture to learn the corresponding inverse model. A graph-attention backbone allows for the system geometry to be passed to the DNN, where residual convolutional layers extract features about the objects, while a UNet head performs the final image reconstruction. Our quantitative and qualitative evaluations on two synthetic data sets of different characteristics showcase the performance gains of thee proposed advanced architecture and its relative resilience to signal noise levels and various reception configurations. </p> </div> </dd> <dt> <a name='item78'>[78]</a> <a href ="/abs/2503.14442" title="Abstract" id="2503.14442"> arXiv:2503.14442 </a> [<a href="/pdf/2503.14442" title="Download PDF" id="pdf-2503.14442" aria-labelledby="pdf-2503.14442">pdf</a>, <a href="/format/2503.14442" title="Other formats" id="oth-2503.14442" aria-labelledby="oth-2503.14442">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Inducing Causal Structure for Interpretable Neural Networks Applied to Glucose Prediction for T1DM Patients </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Esponera,+A">Ana Esponera</a> (1), <a href="https://arxiv.org/search/cs?searchtype=author&query=Cin%C3%A0,+G">Giovanni Cin脿</a> (1 and 2) ((1) Medical Informatics Department from Amsterdam University Medical Center The Netherlands (2) Institute for Logic Language and Computation from University of Amsterdam The Netherlands)</div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 27 pages, 10 pages, to be published in the Proceedings of Machine Learning Research (PMLR), to be presented at the conference CLeaR 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Biomolecules (q-bio.BM) </div> <p class='mathjax'> Causal abstraction techniques such as Interchange Intervention Training (IIT) have been proposed to infuse neural network with expert knowledge encoded in causal models, but their application to real-world problems remains limited. This article explores the application of IIT in predicting blood glucose levels in Type 1 Diabetes Mellitus (T1DM) patients. The study utilizes an acyclic version of the simglucose simulator approved by the FDA to train a Multi-Layer Perceptron (MLP) model, employing IIT to impose causal relationships. Results show that the model trained with IIT effectively abstracted the causal structure and outperformed the standard one in terms of predictive performance across different prediction horizons (PHs) post-meal. Furthermore, the breakdown of the counterfactual loss can be leveraged to explain which part of the causal mechanism are more or less effectively captured by the model. These preliminary results suggest the potential of IIT in enhancing predictive models in healthcare by effectively complying with expert knowledge. </p> </div> </dd> <dt> <a name='item79'>[79]</a> <a href ="/abs/2503.14443" title="Abstract" id="2503.14443"> arXiv:2503.14443 </a> [<a href="/pdf/2503.14443" title="Download PDF" id="pdf-2503.14443" aria-labelledby="pdf-2503.14443">pdf</a>, <a href="https://arxiv.org/html/2503.14443v1" title="View HTML" id="html-2503.14443" aria-labelledby="html-2503.14443" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14443" title="Other formats" id="oth-2503.14443" aria-labelledby="oth-2503.14443">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> EnvBench: A Benchmark for Automated Environment Setup </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Eliseeva,+A">Aleksandra Eliseeva</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kovrigin,+A">Alexander Kovrigin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kholkin,+I">Ilia Kholkin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bogomolov,+E">Egor Bogomolov</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zharov,+Y">Yaroslav Zharov</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted at the DL4Code workshop at ICLR'25 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Software Engineering (cs.SE) </div> <p class='mathjax'> Recent advances in Large Language Models (LLMs) have enabled researchers to focus on practical repository-level tasks in software engineering domain. In this work, we consider a cornerstone task for automating work with software repositories-environment setup, i.e., a task of configuring a repository-specific development environment on a system. Existing studies on environment setup introduce innovative agentic strategies, but their evaluation is often based on small datasets that may not capture the full range of configuration challenges encountered in practice. To address this gap, we introduce a comprehensive environment setup benchmark EnvBench. It encompasses 329 Python and 665 JVM-based (Java, Kotlin) repositories, with a focus on repositories that present genuine configuration challenges, excluding projects that can be fully configured by simple deterministic scripts. To enable further benchmark extension and usage for model tuning, we implement two automatic metrics: a static analysis check for missing imports in Python and a compilation check for JVM languages. We demonstrate the applicability of our benchmark by evaluating three environment setup approaches, including a simple zero-shot baseline and two agentic workflows, that we test with two powerful LLM backbones, GPT-4o and GPT-4o-mini. The best approach manages to successfully configure 6.69% repositories for Python and 29.47% repositories for JVM, suggesting that EnvBench remains challenging for current approaches. Our benchmark suite is publicly available at <a href="https://github.com/JetBrains-Research/EnvBench" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. The dataset and experiment trajectories are available at <a href="https://jb.gg/envbench" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item80'>[80]</a> <a href ="/abs/2503.14476" title="Abstract" id="2503.14476"> arXiv:2503.14476 </a> [<a href="/pdf/2503.14476" title="Download PDF" id="pdf-2503.14476" aria-labelledby="pdf-2503.14476">pdf</a>, <a href="https://arxiv.org/html/2503.14476v1" title="View HTML" id="html-2503.14476" aria-labelledby="html-2503.14476" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14476" title="Other formats" id="oth-2503.14476" aria-labelledby="oth-2503.14476">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> DAPO: An Open-Source LLM Reinforcement Learning System at Scale </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+Q">Qiying Yu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Z">Zheng Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+R">Ruofei Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yuan,+Y">Yufeng Yuan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zuo,+X">Xiaochen Zuo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yue,+Y">Yu Yue</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fan,+T">Tiantian Fan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+G">Gaohong Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+L">Lingjun Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+X">Xin Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lin,+H">Haibin Lin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lin,+Z">Zhiqi Lin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ma,+B">Bole Ma</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sheng,+G">Guangming Sheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tong,+Y">Yuxuan Tong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+C">Chi Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+M">Mofan Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+W">Wang Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+H">Hang Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+J">Jinhua Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+J">Jiaze Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+J">Jiangjie Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+C">Chengyi Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+H">Hongli Yu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dai,+W">Weinan Dai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Song,+Y">Yuxuan Song</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wei,+X">Xiangpeng Wei</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+H">Hao Zhou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+J">Jingjing Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ma,+W">Wei-Ying Ma</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Y">Ya-Qin Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yan,+L">Lin Yan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Qiao,+M">Mu Qiao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+Y">Yonghui Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+M">Mingxuan Wang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Project Page: <a href="https://dapo-sia.github.io/" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Computation and Language (cs.CL) </div> <p class='mathjax'> Inference scaling empowers LLMs with unprecedented reasoning ability, with reinforcement learning as the core technique to elicit complex reasoning. However, key technical details of state-of-the-art reasoning LLMs are concealed (such as in OpenAI o1 blog and DeepSeek R1 technical report), thus the community still struggles to reproduce their RL training results. We propose the $\textbf{D}$ecoupled Clip and $\textbf{D}$ynamic s$\textbf{A}$mpling $\textbf{P}$olicy $\textbf{O}$ptimization ($\textbf{DAPO}$) algorithm, and fully open-source a state-of-the-art large-scale RL system that achieves 50 points on AIME 2024 using Qwen2.5-32B base model. Unlike previous works that withhold training details, we introduce four key techniques of our algorithm that make large-scale LLM RL a success. In addition, we open-source our training code, which is built on the verl framework, along with a carefully curated and processed dataset. These components of our open-source system enhance reproducibility and support future research in large-scale LLM RL. </p> </div> </dd> <dt> <a name='item81'>[81]</a> <a href ="/abs/2503.14481" title="Abstract" id="2503.14481"> arXiv:2503.14481 </a> [<a href="/pdf/2503.14481" title="Download PDF" id="pdf-2503.14481" aria-labelledby="pdf-2503.14481">pdf</a>, <a href="/format/2503.14481" title="Other formats" id="oth-2503.14481" aria-labelledby="oth-2503.14481">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Don't lie to your friends: Learning what you know from collaborative self-play </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Eisenstein,+J">Jacob Eisenstein</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Aghajani,+R">Reza Aghajani</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fisch,+A">Adam Fisch</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dua,+D">Dheeru Dua</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huot,+F">Fantine Huot</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lapata,+M">Mirella Lapata</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zayats,+V">Vicky Zayats</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Berant,+J">Jonathan Berant</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Computation and Language (cs.CL) </div> <p class='mathjax'> To be helpful assistants, AI agents must be aware of their own capabilities and limitations. This includes knowing when to answer from parametric knowledge versus using tools, when to trust tool outputs, and when to abstain or hedge. Such capabilities are hard to teach through supervised fine-tuning because they require constructing examples that reflect the agent's specific capabilities. We therefore propose a radically new approach to teaching agents what they know: \emph{collaborative self-play}. We construct multi-agent collaborations in which the group is rewarded for collectively arriving at correct answers. The desired meta-knowledge emerges from the incentives built into the structure of the interaction. We focus on small societies of agents that have access to heterogeneous tools (corpus-specific retrieval), and therefore must collaborate to maximize their success while minimizing their effort. Experiments show that group-level rewards for multi-agent communities can induce policies that \emph{transfer} to improve tool use and selective prediction in settings where individual agents are deployed in isolation. </p> </div> </dd> </dl> <dl id='articles'> <h3>Cross submissions (showing 88 of 88 entries)</h3> <dt> <a name='item82'>[82]</a> <a href ="/abs/2409.13661" title="Abstract" id="2409.13661"> arXiv:2409.13661 </a> (cross-list from cs.SE) [<a href="/pdf/2409.13661" title="Download PDF" id="pdf-2409.13661" aria-labelledby="pdf-2409.13661">pdf</a>, <a href="https://arxiv.org/html/2409.13661v3" title="View HTML" id="html-2409.13661" aria-labelledby="html-2409.13661" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2409.13661" title="Other formats" id="oth-2409.13661" aria-labelledby="oth-2409.13661">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Efficient Domain Augmentation for Autonomous Driving Testing Using Diffusion Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Baresi,+L">Luciano Baresi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hu,+D+Y+X">Davide Yi Xian Hu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Stocco,+A">Andrea Stocco</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tonella,+P">Paolo Tonella</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted for publication at the 47th International Conference on Software Engineering (ICSE 2025). This research was partially supported by project EMELIOT, funded by MUR under the PRIN 2020 program (n. 2020W3A5FY), by the Bavarian Ministry of Economic Affairs, Regional Development and Energy, by the TUM Global Incentive Fund, and by the EU Project Sec4AI4Sec (n. 101120393) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Software Engineering (cs.SE)</span>; Artificial Intelligence (cs.AI); Computer Vision and Pattern Recognition (cs.CV); Machine Learning (cs.LG) </div> <p class='mathjax'> Simulation-based testing is widely used to assess the reliability of Autonomous Driving Systems (ADS), but its effectiveness is limited by the operational design domain (ODD) conditions available in such simulators. To address this limitation, in this work, we explore the integration of generative artificial intelligence techniques with physics-based simulators to enhance ADS system-level testing. Our study evaluates the effectiveness and computational overhead of three generative strategies based on diffusion models, namely instruction-editing, inpainting, and inpainting with refinement. Specifically, we assess these techniques' capabilities to produce augmented simulator-generated images of driving scenarios representing new ODDs. We employ a novel automated detector for invalid inputs based on semantic segmentation to ensure semantic preservation and realism of the neural generated images. We then perform system-level testing to evaluate the ADS's generalization ability to newly synthesized ODDs. Our findings show that diffusion models help increase the ODD coverage for system-level testing of ADS. Our automated semantic validator achieved a percentage of false positives as low as 3%, retaining the correctness and quality of the generated images for testing. Our approach successfully identified new ADS system failures before real-world testing. </p> </div> </dd> <dt> <a name='item83'>[83]</a> <a href ="/abs/2411.08553" title="Abstract" id="2411.08553"> arXiv:2411.08553 </a> (cross-list from cs.CL) [<a href="/pdf/2411.08553" title="Download PDF" id="pdf-2411.08553" aria-labelledby="pdf-2411.08553">pdf</a>, <a href="https://arxiv.org/html/2411.08553v1" title="View HTML" id="html-2411.08553" aria-labelledby="html-2411.08553" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.08553" title="Other formats" id="oth-2411.08553" aria-labelledby="oth-2411.08553">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> CorrSynth -- A Correlated Sampling Method for Diverse Dataset Generation from LLMs </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Kowshik,+S+S">Suhas S Kowshik</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Divekar,+A">Abhishek Divekar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Malik,+V">Vijit Malik</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Published as a main conference paper at EMNLP 2024; First two authors contributed equally </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> Large language models (LLMs) have demonstrated remarkable performance in diverse tasks using zero-shot and few-shot prompting. Even though their capabilities of data synthesis have been studied well in recent years, the generated data suffers from a lack of diversity, less adherence to the prompt, and potential biases that creep into the data from the generator model. In this work, we tackle the challenge of generating datasets with high diversity, upon which a student model is trained for downstream tasks. Taking the route of decoding-time guidance-based approaches, we propose CorrSynth, which generates data that is more diverse and faithful to the input prompt using a correlated sampling strategy. Further, our method overcomes the complexity drawbacks of some other guidance-based techniques like classifier-based guidance. With extensive experiments, we show the effectiveness of our approach and substantiate our claims. In particular, we perform intrinsic evaluation to show the improvements in diversity. Our experiments show that CorrSynth improves both student metrics and intrinsic metrics upon competitive baselines across four datasets, showing the innate advantage of our method. </p> </div> </dd> <dt> <a name='item84'>[84]</a> <a href ="/abs/2503.13463" title="Abstract" id="2503.13463"> arXiv:2503.13463 </a> (cross-list from cs.DL) [<a href="/pdf/2503.13463" title="Download PDF" id="pdf-2503.13463" aria-labelledby="pdf-2503.13463">pdf</a>, <a href="https://arxiv.org/html/2503.13463v1" title="View HTML" id="html-2503.13463" aria-labelledby="html-2503.13463" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13463" title="Other formats" id="oth-2503.13463" aria-labelledby="oth-2503.13463">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Completeness of Datasets Documentation on ML/AI repositories: an Empirical Investigation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Rondina,+M">Marco Rondina</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Vetr%C3%B2,+A">Antonio Vetr貌</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=De+Martin,+J+C">Juan Carlos De Martin</a></div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> Progress in Artificial Intelligence. EPIA 2023. Lecture Notes in Computer Science(), vol 14115. Springer, Cham </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Digital Libraries (cs.DL)</span>; Artificial Intelligence (cs.AI); Human-Computer Interaction (cs.HC); Machine Learning (cs.LG) </div> <p class='mathjax'> ML/AI is the field of computer science and computer engineering that arguably received the most attention and funding over the last decade. Data is the key element of ML/AI, so it is becoming increasingly important to ensure that users are fully aware of the quality of the datasets that they use, and of the process generating them, so that possible negative impacts on downstream effects can be tracked, analysed, and, where possible, mitigated. One of the tools that can be useful in this perspective is dataset documentation. The aim of this work is to investigate the state of dataset documentation practices, measuring the completeness of the documentation of several popular datasets in ML/AI repositories. We created a dataset documentation schema -- the Documentation Test Sheet (DTS) -- that identifies the information that should always be attached to a dataset (to ensure proper dataset choice and informed use), according to relevant studies in the literature. We verified 100 popular datasets from four different repositories with the DTS to investigate which information was present. Overall, we observed a lack of relevant documentation, especially about the context of data collection and data processing, highlighting a paucity of transparency. </p> </div> </dd> <dt> <a name='item85'>[85]</a> <a href ="/abs/2503.13465" title="Abstract" id="2503.13465"> arXiv:2503.13465 </a> (cross-list from eess.SP) [<a href="/pdf/2503.13465" title="Download PDF" id="pdf-2503.13465" aria-labelledby="pdf-2503.13465">pdf</a>, <a href="https://arxiv.org/html/2503.13465v1" title="View HTML" id="html-2503.13465" aria-labelledby="html-2503.13465" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13465" title="Other formats" id="oth-2503.13465" aria-labelledby="oth-2503.13465">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A novel Fourier Adjacency Transformer for advanced EEG emotion recognition </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Wang,+J">Jinfeng Wang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Huang,+Y">Yanhao Huang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Song,+S">Sifan Song</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Wang,+B">Boqian Wang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Su,+J">Jionglong Su</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Ding,+J">Jiaman Ding</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG); Neurons and Cognition (q-bio.NC) </div> <p class='mathjax'> EEG emotion recognition faces significant hurdles due to noise interference, signal nonstationarity, and the inherent complexity of brain activity which make accurately emotion classification. In this study, we present the Fourier Adjacency Transformer, a novel framework that seamlessly integrates Fourier-based periodic analysis with graph-driven structural modeling. Our method first leverages novel Fourier-inspired modules to extract periodic features from embedded EEG signals, effectively decoupling them from aperiodic components. Subsequently, we employ an adjacency attention scheme to reinforce universal inter-channel correlation patterns, coupling these patterns with their sample-based counterparts. Empirical evaluations on SEED and DEAP datasets demonstrate that our method surpasses existing state-of-the-art techniques, achieving an improvement of approximately 6.5% in recognition accuracy. By unifying periodicity and structural insights, this framework offers a promising direction for future research in EEG emotion analysis. </p> </div> </dd> <dt> <a name='item86'>[86]</a> <a href ="/abs/2503.13468" title="Abstract" id="2503.13468"> arXiv:2503.13468 </a> (cross-list from eess.SP) [<a href="/pdf/2503.13468" title="Download PDF" id="pdf-2503.13468" aria-labelledby="pdf-2503.13468">pdf</a>, <a href="https://arxiv.org/html/2503.13468v1" title="View HTML" id="html-2503.13468" aria-labelledby="html-2503.13468" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13468" title="Other formats" id="oth-2503.13468" aria-labelledby="oth-2503.13468">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A CGAN-LSTM-Based Framework for Time-Varying Non-Stationary Channel Modeling </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Guo,+K">Keying Guo</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=He,+R">Ruisi He</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Yang,+M">Mi Yang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Zhang,+Y">Yuxin Zhang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Ai,+B">Bo Ai</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Zhang,+H">Haoxiang Zhang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Han,+J">Jiahui Han</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Chen,+R">Ruifeng Chen</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 11 pages,7 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Time-varying non-stationary channels, with complex dynamic variations and temporal evolution characteristics, have significant challenges in channel modeling and communication system performance evaluation. Most existing methods of time-varying channel modeling focus on predicting channel state at a given moment or simulating short-term channel fluctuations, which are unable to capture the long-term evolution of the channel. This paper emphasizes the generation of long-term dynamic channel to fully capture evolution of non-stationary channel properties. The generated channel not only reflects temporal dynamics but also ensures consistent stationarity. We propose a hybrid deep learning framework that combines conditional generative adversarial networks (CGAN) with long short-term memory (LSTM) networks. A stationarity-constrained approach is designed to ensure temporal correlation of the generated time-series channel. This method can generate channel with required temporal non-stationarity. The model is validated by comparing channel statistical features, and the results show that the generated channel is in good agreement with raw channel and provides good performance in terms of non-stationarity. </p> </div> </dd> <dt> <a name='item87'>[87]</a> <a href ="/abs/2503.13469" title="Abstract" id="2503.13469"> arXiv:2503.13469 </a> (cross-list from eess.SP) [<a href="/pdf/2503.13469" title="Download PDF" id="pdf-2503.13469" aria-labelledby="pdf-2503.13469">pdf</a>, <a href="https://arxiv.org/html/2503.13469v1" title="View HTML" id="html-2503.13469" aria-labelledby="html-2503.13469" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13469" title="Other formats" id="oth-2503.13469" aria-labelledby="oth-2503.13469">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Conditional Electrocardiogram Generation Using Hierarchical Variational Autoencoders </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Sviridov,+I">Ivan Sviridov</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Egorov,+K">Konstantin Egorov</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 10 pages, 6 figures, 7 tables </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span>; Computer Vision and Pattern Recognition (cs.CV); Machine Learning (cs.LG) </div> <p class='mathjax'> Cardiovascular diseases (CVDs) are disorders impacting the heart and circulatory system. These disorders are the foremost and continuously escalating cause of mortality worldwide. One of the main tasks when working with CVDs is analyzing and identifying pathologies on a 12-lead electrocardiogram (ECG) with a standard 10-second duration. Using machine learning (ML) in automatic ECG analysis increases CVD diagnostics' availability, speed, and accuracy. However, the most significant difficulty in developing ML models is obtaining a sufficient training dataset. Due to the limitations of medical data usage, such as expensiveness, errors, the ambiguity of labels, imbalance of classes, and privacy issues, utilizing synthetic samples depending on specific pathologies bypasses these restrictions and improves algorithm quality. Existing solutions for the conditional generation of ECG signals are mainly built on Generative Adversarial Networks (GANs), and only a few papers consider the architectures based on Variational Autoencoders (VAEs), showing comparable results in recent works. This paper proposes the publicly available conditional Nouveau VAE model for ECG signal generation (cNVAE-ECG), which produces high-resolution ECGs with multiple pathologies. We provide an extensive comparison of the proposed model on various practical downstream tasks, including transfer learning scenarios showing an area under the receiver operating characteristic (AUROC) increase up to 2% surpassing GAN-like competitors. </p> </div> </dd> <dt> <a name='item88'>[88]</a> <a href ="/abs/2503.13470" title="Abstract" id="2503.13470"> arXiv:2503.13470 </a> (cross-list from eess.SP) [<a href="/pdf/2503.13470" title="Download PDF" id="pdf-2503.13470" aria-labelledby="pdf-2503.13470">pdf</a>, <a href="https://arxiv.org/html/2503.13470v1" title="View HTML" id="html-2503.13470" aria-labelledby="html-2503.13470" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13470" title="Other formats" id="oth-2503.13470" aria-labelledby="oth-2503.13470">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Multimodal Lead-Specific Modeling of ECG for Low-Cost Pulmonary Hypertension Assessment </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Suvon,+M+N+I">Mohammod N. I. Suvon</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Zhou,+S">Shuo Zhou</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Tripathi,+P+C">Prasun C. Tripathi</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Fan,+W">Wenrui Fan</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Alabed,+S">Samer Alabed</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Khanal,+B">Bishesh Khanal</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Osmani,+V">Venet Osmani</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Swift,+A+J">Andrew J. Swift</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Chen">Chen</a> (Cherise)<a href="https://arxiv.org/search/eess?searchtype=author&query=Chen">Chen</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Lu,+H">Haiping Lu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span>; Computer Vision and Pattern Recognition (cs.CV); Machine Learning (cs.LG) </div> <p class='mathjax'> Pulmonary hypertension (PH) is frequently underdiagnosed in low- and middle-income countries (LMICs) primarily due to the scarcity of advanced diagnostic tools. Several studies in PH have applied machine learning to low-cost diagnostic tools like 12-lead ECG (12L-ECG), but they mainly focus on areas with limited resources, overlooking areas with no diagnostic tools, such as rural primary healthcare in LMICs. Recent studies have shown the effectiveness of 6-lead ECG (6L-ECG), as a cheaper and portable alternative in detecting various cardiac conditions, but its clinical value for PH detection is not well proved. Furthermore, existing methods treat 12L-/6L-ECG as a single modality, capturing only shared features while overlooking lead-specific features essential for identifying complex cardiac hemodynamic changes. In this paper, we propose Lead-Specific Electrocardiogram Multimodal Variational Autoencoder (LS-EMVAE), a model pre-trained on large-population 12L-ECG data and fine-tuned on task-specific data (12L-ECG or 6L-ECG). LS-EMVAE models each 12L-ECG lead as a separate modality and introduces a hierarchical expert composition using Mixture and Product of Experts for adaptive latent feature fusion between lead-specific and shared features. Unlike existing approaches, LS-EMVAE makes better predictions on both 12L-ECG and 6L-ECG at inference, making it an equitable solution for areas with limited or no diagnostic tools. We pre-trained LS-EMVAE on 800,000 publicly available 12L-ECG samples and fine-tuned it for two tasks: 1) PH detection and 2) phenotyping pre-/post-capillary PH, on in-house datasets of 892 and 691 subjects across 12L-ECG and 6L-ECG settings. Extensive experiments show that LS-EMVAE outperforms existing baselines in both ECG settings, while 6L-ECG achieves performance comparable to 12L-ECG, unlocking its potential for global PH screening in areas without diagnostic tools. </p> </div> </dd> <dt> <a name='item89'>[89]</a> <a href ="/abs/2503.13475" title="Abstract" id="2503.13475"> arXiv:2503.13475 </a> (cross-list from eess.SP) [<a href="/pdf/2503.13475" title="Download PDF" id="pdf-2503.13475" aria-labelledby="pdf-2503.13475">pdf</a>, <a href="/format/2503.13475" title="Other formats" id="oth-2503.13475" aria-labelledby="oth-2503.13475">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Cross-Subject Depression Level Classification Using EEG Signals with a Sample Confidence Method </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Zhang,+Z">ZhongYi Zhang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Xu,+C">ChenYang Xu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Zhao,+L">LiXuan Zhao</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Hou,+H">HuiRang Hou</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Meng,+Q">QingHao Meng</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> Electroencephalogram (EEG) is a non-invasive tool for real-time neural monitoring,widely used in depression detection via deep learning. However, existing models primarily focus on binary classification (depression/normal), lacking granularity for severity assessment. To address this, we proposed the DepL-GCN, i.e., Depression Level classification based on GCN model. This model tackles two key challenges: (1) subjectivity in depres-sion-level labeling due to patient self-report biases, and (2) class imbalance across severity categories. Inspired by the model learning patterns, we introduced two novel modules: the sample confidence module and the minority sample penalty module. The former leverages the L2-norm of prediction errors to progressively filter EEG samples with weak label alignment during training, thereby reducing the impact of subjectivity; the latter automatically upweights misclassified minority-class samples to address imbalance issues. After testing on two public EEG datasets, DepL-GCN achieved accuracies of 81.13% and 81.36% for multi-class severity recognition, outperforming baseline <a href="http://models.Ablation" rel="external noopener nofollow" class="link-external link-http">this http URL</a> studies confirmed both modules' contributions. We further discussed the strengths and limitations of regression-based models for depression-level recognition. </p> </div> </dd> <dt> <a name='item90'>[90]</a> <a href ="/abs/2503.13476" title="Abstract" id="2503.13476"> arXiv:2503.13476 </a> (cross-list from eess.SP) [<a href="/pdf/2503.13476" title="Download PDF" id="pdf-2503.13476" aria-labelledby="pdf-2503.13476">pdf</a>, <a href="https://arxiv.org/html/2503.13476v1" title="View HTML" id="html-2503.13476" aria-labelledby="html-2503.13476" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13476" title="Other formats" id="oth-2503.13476" aria-labelledby="oth-2503.13476">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Radar Pulse Deinterleaving with Transformer Based Deep Metric Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Gunn,+E">Edward Gunn</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Hosford,+A">Adam Hosford</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Mannion,+D">Daniel Mannion</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Williams,+J">Jarrod Williams</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Chhabra,+V">Varun Chhabra</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Nockles,+V">Victoria Nockles</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Preprint: Accepted to IEEE International Radar Conference 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> When receiving radar pulses it is common for a recorded pulse train to contain pulses from many different emitters. The radar pulse deinterleaving problem is the task of separating out these pulses by the emitter from which they originated. Notably, the number of emitters in any particular recorded pulse train is considered unknown. In this paper, we define the problem and present metrics that can be used to measure model performance. We propose a metric learning approach to this problem using a transformer trained with the triplet loss on synthetic data. This model achieves strong results in comparison with other deep learning models with an adjusted mutual information score of 0.882. </p> </div> </dd> <dt> <a name='item91'>[91]</a> <a href ="/abs/2503.13480" title="Abstract" id="2503.13480"> arXiv:2503.13480 </a> (cross-list from eess.SP) [<a href="/pdf/2503.13480" title="Download PDF" id="pdf-2503.13480" aria-labelledby="pdf-2503.13480">pdf</a>, <a href="https://arxiv.org/html/2503.13480v1" title="View HTML" id="html-2503.13480" aria-labelledby="html-2503.13480" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13480" title="Other formats" id="oth-2503.13480" aria-labelledby="oth-2503.13480">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> WVEmbs with its Masking: A Method For Radar Signal Sorting </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Hu,+X">Xianan Hu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Li,+F">Fu Li</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Niu,+K">Kairui Niu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Qi,+P">Peihan Qi</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Liang,+Z">Zhiyong Liang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Our study proposes a novel embedding method, Wide-Value-Embeddings (WVEmbs), for processing Pulse Descriptor Words (PDWs) as normalized inputs to neural networks. This method adapts to the distribution of interleaved radar signals, ranking original signal features from trivial to useful and stabilizing the learning process. To address the imbalance in radar signal interleaving, we introduce a value dimension masking method on WVEmbs, which automatically and efficiently generates challenging samples, and constructs interleaving scenarios, thereby compelling the model to learn robust features. Experimental results demonstrate that our method is an efficient end-to-end approach, achieving high-granularity, sample-level pulse sorting for high-density interleaved radar pulse sequences in complex and non-ideal environments. </p> </div> </dd> <dt> <a name='item92'>[92]</a> <a href ="/abs/2503.13485" title="Abstract" id="2503.13485"> arXiv:2503.13485 </a> (cross-list from cs.DL) [<a href="/pdf/2503.13485" title="Download PDF" id="pdf-2503.13485" aria-labelledby="pdf-2503.13485">pdf</a>, <a href="https://arxiv.org/html/2503.13485v1" title="View HTML" id="html-2503.13485" aria-labelledby="html-2503.13485" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13485" title="Other formats" id="oth-2503.13485" aria-labelledby="oth-2503.13485">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Causal Inference Approach for Quantifying Research Impact </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ochiai,+K">Keiichi Ochiai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Matsuo,+Y">Yutaka Matsuo</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Digital Libraries (cs.DL)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Deep learning has had a great impact on various fields of computer science by enabling data-driven representation learning in a decade. Because science and technology policy decisions for a nation can be made on the impact of each technology, quantifying research impact is an important task. The number of citations and impact factor can be used to measure the impact for individual research. What would have happened without the research, however, is fundamentally a counterfactual phenomenon. Thus, we propose an approach based on causal inference to quantify the research impact of a specific technical topic. We leverage difference-in-difference to quantify the research impact by applying to bibliometric data. First, we identify papers of a specific technical topic using keywords or category tags from Microsoft Academic Graph, which is one of the largest academic publication dataset. Next, we build a paper citation network between each technical field. Then, we aggregate the cross-field citation count for each research field. Finally, the impact of a specific technical topic for each research field is estimated by applying difference-in-difference. Evaluation results show that deep learning significantly affects computer vision and natural language processing. Besides, deep learning significantly affects cross-field citation especially for speech recognition to computer vision and natural language processing to computer vision. Moreover, our method revealed that the impact of deep learning was 3.1 times of the impact of interpretability for ML models. </p> </div> </dd> <dt> <a name='item93'>[93]</a> <a href ="/abs/2503.13486" title="Abstract" id="2503.13486"> arXiv:2503.13486 </a> (cross-list from eess.SP) [<a href="/pdf/2503.13486" title="Download PDF" id="pdf-2503.13486" aria-labelledby="pdf-2503.13486">pdf</a>, <a href="https://arxiv.org/html/2503.13486v1" title="View HTML" id="html-2503.13486" aria-labelledby="html-2503.13486" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13486" title="Other formats" id="oth-2503.13486" aria-labelledby="oth-2503.13486">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Machine learning for triage of strokes with large vessel occlusion using photoplethysmography biomarkers </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Goda,+M+%C3%81">M谩rton 脕. Goda</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Badge,+H">Helen Badge</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Khan,+J">Jasmeen Khan</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Solewicz,+Y">Yosef Solewicz</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Davoodi,+M">Moran Davoodi</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Teramayi,+R">Rumbidzai Teramayi</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Cordato,+D">Dennis Cordato</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Lin,+L">Longting Lin</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Christie,+L">Lauren Christie</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Blair,+C">Christopher Blair</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Sharma,+G">Gagan Sharma</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Parsons,+M">Mark Parsons</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Behar,+J+A">Joachim A. Behar</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Objective. Large vessel occlusion (LVO) stroke presents a major challenge in clinical practice due to the potential for poor outcomes with delayed treatment. Treatment for LVO involves highly specialized care, in particular endovascular thrombectomy, and is available only at certain hospitals. Therefore, prehospital identification of LVO by emergency ambulance services, can be critical for triaging LVO stroke patients directly to a hospital with access to endovascular therapy. Clinical scores exist to help distinguish LVO from less severe strokes, but they are based on a series of examinations that can take minutes and may be impractical for patients with dementia or those who cannot follow commands due to their stroke. There is a need for a fast and reliable method to aid in the early identification of LVO. In this study, our objective was to assess the feasibility of using 30-second photoplethysmography (PPG) recording to assist in recognizing LVO stroke. Method. A total of 88 patients, including 25 with LVO, 27 with stroke mimic (SM), and 36 non-LVO stroke patients (NL), were recorded at the Liverpool Hospital emergency department in Sydney, Australia. Demographics (age, sex), as well as morphological features and beating rate variability measures, were extracted from the PPG. A binary classification approach was employed to differentiate between LVO stroke and NL+SM (<a href="http://NL.SM" rel="external noopener nofollow" class="link-external link-http">this http URL</a>). A 2:1 train-test split was stratified and repeated randomly across 100 iterations. Results. The best model achieved a median test set area under the receiver operating characteristic curve (AUROC) of 0.77 (0.71--0.82). \textit{Conclusion.} Our study demonstrates the potential of utilizing a 30-second PPG recording for identifying LVO stroke. </p> </div> </dd> <dt> <a name='item94'>[94]</a> <a href ="/abs/2503.13487" title="Abstract" id="2503.13487"> arXiv:2503.13487 </a> (cross-list from eess.SP) [<a href="/pdf/2503.13487" title="Download PDF" id="pdf-2503.13487" aria-labelledby="pdf-2503.13487">pdf</a>, <a href="https://arxiv.org/html/2503.13487v1" title="View HTML" id="html-2503.13487" aria-labelledby="html-2503.13487" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13487" title="Other formats" id="oth-2503.13487" aria-labelledby="oth-2503.13487">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Statistical Study of Sensor Data and Investigation of ML-based Calibration Algorithms for Inexpensive Sensor Modules: Experiments from Cape Point </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Barrett,+T">Travis Barrett</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Mishra,+A+K">Amit Kumar Mishra</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span>; Machine Learning (cs.LG); Systems and Control (eess.SY) </div> <p class='mathjax'> In this paper we present the statistical analysis of data from inexpensive sensors. We also present the performance of machine learning algorithms when used for automatic calibration such sensors. In this we have used low-cost Non-Dispersive Infrared CO$_2$ sensor placed at a co-located site at Cape Point, South Africa (maintained by Weather South Africa). The collected low-cost sensor data and site truth data are investigated and compared. We compare and investigate the performance of Random Forest Regression, Support Vector Regression, 1D Convolutional Neural Network and 1D-CNN Long Short-Term Memory Network models as a method for automatic calibration and the statistical properties of these model predictions. In addition, we also investigate the drift in performance of these algorithms with time. </p> </div> </dd> <dt> <a name='item95'>[95]</a> <a href ="/abs/2503.13490" title="Abstract" id="2503.13490"> arXiv:2503.13490 </a> (cross-list from eess.SP) [<a href="/pdf/2503.13490" title="Download PDF" id="pdf-2503.13490" aria-labelledby="pdf-2503.13490">pdf</a>, <a href="https://arxiv.org/html/2503.13490v1" title="View HTML" id="html-2503.13490" aria-labelledby="html-2503.13490" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13490" title="Other formats" id="oth-2503.13490" aria-labelledby="oth-2503.13490">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Cascade of one-class classifier ensemble and dynamic naive Bayes classifier applied to the myoelectric-based upper limb prosthesis control with contaminated channels detection </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Trajdos,+P">Pawel Trajdos</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Kurzynski,+M">Marek Kurzynski</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Modern upper limb bioprostheses are typically controlled by sEMG signals using a pattern recognition scheme in the control process. Unfortunately, the sEMG signal is very susceptible to contamination that deteriorates the quality of the control system and reduces the usefulness of the prosthesis in the patient's everyday life. In the paper, the authors propose a new recognition system intended for sEMG-based control of the hand prosthesis with detection of contaminated sEMG signals. The originality of the proposed solution lies in the co-operation of two recognition systems working in a cascade structure: (1) an ensemble of one-class classifiers used to recognise contaminated signals and (2) a naive Bayes classifier (NBC) which recognises the patient's intentions using the information about contaminations produced by the ensemble. Although in the proposed approach, the NBC model is changed dynamically, due to the multiplicative form of the classification functions, training can be performed in a one-shot procedure. Experimental studies were conducted using real sEMG signals. The results obtained confirm the hypothesis that the use of the one-class classifier ensemble and the dynamic NBC model leads to improved classification quality. </p> </div> </dd> <dt> <a name='item96'>[96]</a> <a href ="/abs/2503.13491" title="Abstract" id="2503.13491"> arXiv:2503.13491 </a> (cross-list from eess.SP) [<a href="/pdf/2503.13491" title="Download PDF" id="pdf-2503.13491" aria-labelledby="pdf-2503.13491">pdf</a>, <a href="https://arxiv.org/html/2503.13491v1" title="View HTML" id="html-2503.13491" aria-labelledby="html-2503.13491" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13491" title="Other formats" id="oth-2503.13491" aria-labelledby="oth-2503.13491">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> FLP-XR: Future Location Prediction on Extreme Scale Maritime Data in Real-time </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Theodoropoulos,+G+S">George S. Theodoropoulos</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Patakis,+A">Andreas Patakis</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Tritsarolis,+A">Andreas Tritsarolis</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Theodoridis,+Y">Yannis Theodoridis</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Movements of maritime vessels are inherently complex and challenging to model due to the dynamic and often unpredictable nature of maritime operations. Even within structured maritime environments, such as shipping lanes and port approaches, where vessels adhere to navigational rules and predefined sea routes, uncovering underlying patterns is far from trivial. The necessity for accurate modeling of the mobility of maritime vessels arises from the numerous applications it serves, including risk assessment for collision avoidance, optimization of shipping routes, and efficient port management. This paper introduces FLP-XR, a model that leverages maritime mobility data to construct a robust framework that offers precise predictions while ensuring extremely fast training and inference capabilities. We demonstrate the efficiency of our approach through an extensive experimental study using three real-world AIS datasets. According to the experimental results, FLP-XR outperforms the current state-of-the-art in many cases, whereas it performs 2-3 orders of magnitude faster in terms of training and inference. </p> </div> </dd> <dt> <a name='item97'>[97]</a> <a href ="/abs/2503.13493" title="Abstract" id="2503.13493"> arXiv:2503.13493 </a> (cross-list from eess.SP) [<a href="/pdf/2503.13493" title="Download PDF" id="pdf-2503.13493" aria-labelledby="pdf-2503.13493">pdf</a>, <a href="/format/2503.13493" title="Other formats" id="oth-2503.13493" aria-labelledby="oth-2503.13493">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Analysis of Learning-based Offshore Wind Power Prediction Models with Various Feature Combinations </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Fang,+L">Linhan Fang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Jiang,+F">Fan Jiang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Toms,+A+M">Ann Mary Toms</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Li,+X">Xingpeng Li</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span>; Machine Learning (cs.LG); Applications (stat.AP) </div> <p class='mathjax'> Accurate wind speed prediction is crucial for designing and selecting sites for offshore wind farms. This paper investigates the effectiveness of various machine learning models in predicting offshore wind power for a site near the Gulf of Mexico by analyzing meteorological data. After collecting and preprocessing meteorological data, nine different input feature combinations were designed to assess their impact on wind power predictions at multiple heights. The results show that using wind speed as the output feature improves prediction accuracy by approximately 10% compared to using wind power as the output. In addition, the improvement of multi-feature input compared with single-feature input is not obvious mainly due to the poor correlation among key features and limited generalization ability of models. These findings underscore the importance of selecting appropriate output features and highlight considerations for using machine learning in wind power forecasting, offering insights that could guide future wind power prediction models and conversion techniques. </p> </div> </dd> <dt> <a name='item98'>[98]</a> <a href ="/abs/2503.13495" title="Abstract" id="2503.13495"> arXiv:2503.13495 </a> (cross-list from eess.SP) [<a href="/pdf/2503.13495" title="Download PDF" id="pdf-2503.13495" aria-labelledby="pdf-2503.13495">pdf</a>, <a href="https://arxiv.org/html/2503.13495v1" title="View HTML" id="html-2503.13495" aria-labelledby="html-2503.13495" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13495" title="Other formats" id="oth-2503.13495" aria-labelledby="oth-2503.13495">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> TransECG: Leveraging Transformers for Explainable ECG Re-identification Risk Analysis </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Wang,+Z">Ziyu Wang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Khatibi,+E">Elahe Khatibi</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Kazemi,+K">Kianoosh Kazemi</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Azimi,+I">Iman Azimi</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Mousavi,+S">Sanaz Mousavi</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Malik,+S">Shaista Malik</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Rahmani,+A+M">Amir M. Rahmani</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Electrocardiogram (ECG) signals are widely shared across multiple clinical applications for diagnosis, health monitoring, and biometric authentication. While valuable for healthcare, they also carry unique biometric identifiers that pose privacy risks, especially when ECG data shared across multiple entities. These risks are amplified in shared environments, where re-identification threats can compromise patient privacy. Existing deep learning re-identification models prioritize accuracy but lack explainability, making it challenging to understand how the unique biometric characteristics encoded within ECG signals are recognized and utilized for identification. Without these insights, despite high accuracy, developing secure and trustable ECG data-sharing frameworks remains difficult, especially in diverse, multi-source environments. In this work, we introduce TransECG, a Vision Transformer (ViT)-based method that uses attention mechanisms to pinpoint critical ECG segments associated with re-identification tasks like gender, age, and participant ID. Our approach demonstrates high accuracy (89.9% for gender, 89.9% for age, and 88.6% for ID re-identification) across four real-world datasets with 87 participants. Importantly, we provide key insights into ECG components such as the R-wave, QRS complex, and P-Q interval in re-identification. For example, in the gender classification, the R wave contributed 58.29% to the model's attention, while in the age classification, the P-R interval contributed 46.29%. By combining high predictive performance with enhanced explainability, TransECG provides a robust solution for privacy-conscious ECG data sharing, supporting the development of secure and trusted healthcare data environment. </p> </div> </dd> <dt> <a name='item99'>[99]</a> <a href ="/abs/2503.13496" title="Abstract" id="2503.13496"> arXiv:2503.13496 </a> (cross-list from eess.SP) [<a href="/pdf/2503.13496" title="Download PDF" id="pdf-2503.13496" aria-labelledby="pdf-2503.13496">pdf</a>, <a href="https://arxiv.org/html/2503.13496v1" title="View HTML" id="html-2503.13496" aria-labelledby="html-2503.13496" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13496" title="Other formats" id="oth-2503.13496" aria-labelledby="oth-2503.13496">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Finger-to-Chest Style Transfer-assisted Deep Learning Method For Photoplethysmogram Waveform Restoration with Timing Preservation </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Pagotto,+S+M">Sara Maria Pagotto</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Tognoni,+F">Federico Tognoni</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Rossi,+M">Matteo Rossi</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Bovio,+D">Dario Bovio</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Salito,+C">Caterina Salito</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Mainardi,+L">Luca Mainardi</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Cerveri,+P">Pietro Cerveri</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span>; Machine Learning (cs.LG); Quantitative Methods (q-bio.QM) </div> <p class='mathjax'> Wearable measurements, such as those obtained by photoplethysmogram (PPG) sensors are highly susceptible to motion artifacts and noise, affecting cardiovascular measures. Chest-acquired PPG signals are especially vulnerable, with signal degradation primarily resulting from lower perfusion, breathing-induced motion, and mechanical interference from chest movements. Traditional restoration methods often degrade the signal, and supervised deep learning (DL) struggles with random and systematic distortions, requiring very large datasets for successful training. To efficiently restore chest PPG waveform, we propose a style transfer-assisted cycle-consistent generative adversarial network, called starGAN, whose performance is evaluated on a three-channel PPG signal (red, green,and infrared) acquired by a chest-worn multi-modal sensor, called Soundi. Two identical devices are adopted, one sensor to collect the PPG signal on the chest, considered to feature low quality and undergoing restoration, and another sensor to obtain a high-quality PPG signal measured on the finger, considered the reference signal. Extensive validation over some 8,000 5-second chunks collected from 40 subjects showed about 90% correlation of the restored chest PPG with the reference finger PPG, with a 30% improvement over raw chest PPG. Likewise, the signal-to-noise ratio improved on average of about 125%, over the three channels. The agreement with heart-rate computed from concurrent ECG was extremely high, overcoming 84% on average. These results demonstrate effective signal restoration, comparable with findings in recent literature papers. Significance: PPG signals collected from wearable devices are highly susceptible to artifacts, making innovative AI-based techniques fundamental towards holistic health assessments in a single device. </p> </div> </dd> <dt> <a name='item100'>[100]</a> <a href ="/abs/2503.13497" title="Abstract" id="2503.13497"> arXiv:2503.13497 </a> (cross-list from eess.SP) [<a href="/pdf/2503.13497" title="Download PDF" id="pdf-2503.13497" aria-labelledby="pdf-2503.13497">pdf</a>, <a href="https://arxiv.org/html/2503.13497v1" title="View HTML" id="html-2503.13497" aria-labelledby="html-2503.13497" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13497" title="Other formats" id="oth-2503.13497" aria-labelledby="oth-2503.13497">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Is Limited Participant Diversity Impeding EEG-based Machine Learning? </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Bomatter,+P">Philipp Bomatter</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Gouk,+H">Henry Gouk</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> The application of machine learning (ML) to electroencephalography (EEG) has great potential to advance both neuroscientific research and clinical applications. However, the generalisability and robustness of EEG-based ML models often hinge on the amount and diversity of training data. It is common practice to split EEG recordings into small segments, thereby increasing the number of samples substantially compared to the number of individual recordings or participants. We conceptualise this as a multi-level data generation process and investigate the scaling behaviour of model performance with respect to the overall sample size and the participant diversity through large-scale empirical studies. We then use the same framework to investigate the effectiveness of different ML strategies designed to address limited data problems: data augmentations and self-supervised learning. Our findings show that model performance scaling can be severely constrained by participant distribution shifts and provide actionable guidance for data collection and ML research. </p> </div> </dd> <dt> <a name='item101'>[101]</a> <a href ="/abs/2503.13502" title="Abstract" id="2503.13502"> arXiv:2503.13502 </a> (cross-list from cs.DB) [<a href="/pdf/2503.13502" title="Download PDF" id="pdf-2503.13502" aria-labelledby="pdf-2503.13502">pdf</a>, <a href="https://arxiv.org/html/2503.13502v1" title="View HTML" id="html-2503.13502" aria-labelledby="html-2503.13502" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13502" title="Other formats" id="oth-2503.13502" aria-labelledby="oth-2503.13502">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Foundation Models for Spatio-Temporal Data Science: A Tutorial and Survey </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Liang,+Y">Yuxuan Liang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wen,+H">Haomin Wen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xia,+Y">Yutong Xia</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jin,+M">Ming Jin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+B">Bin Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Salim,+F">Flora Salim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wen,+Q">Qingsong Wen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pan,+S">Shirui Pan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cong,+G">Gao Cong</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Databases (cs.DB)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Spatio-Temporal (ST) data science, which includes sensing, managing, and mining large-scale data across space and time, is fundamental to understanding complex systems in domains such as urban computing, climate science, and intelligent transportation. Traditional deep learning approaches have significantly advanced this field, particularly in the stage of ST data mining. However, these models remain task-specific and often require extensive labeled data. Inspired by the success of Foundation Models (FM), especially large language models, researchers have begun exploring the concept of Spatio-Temporal Foundation Models (STFMs) to enhance adaptability and generalization across diverse ST tasks. Unlike prior architectures, STFMs empower the entire workflow of ST data science, ranging from data sensing, management, to mining, thereby offering a more holistic and scalable approach. Despite rapid progress, a systematic study of STFMs for ST data science remains lacking. This survey aims to provide a comprehensive review of STFMs, categorizing existing methodologies and identifying key research directions to advance ST general intelligence. </p> </div> </dd> <dt> <a name='item102'>[102]</a> <a href ="/abs/2503.13505" title="Abstract" id="2503.13505"> arXiv:2503.13505 </a> (cross-list from cs.CL) [<a href="/pdf/2503.13505" title="Download PDF" id="pdf-2503.13505" aria-labelledby="pdf-2503.13505">pdf</a>, <a href="https://arxiv.org/html/2503.13505v1" title="View HTML" id="html-2503.13505" aria-labelledby="html-2503.13505" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13505" title="Other formats" id="oth-2503.13505" aria-labelledby="oth-2503.13505">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Ensemble Learning for Large Language Models in Text and Code Generation: A Survey </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ashiga,+M">Mari Ashiga</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jie,+W">Wei Jie</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+F">Fan Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Voskanyan,+V">Vardan Voskanyan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dinmohammadi,+F">Fateme Dinmohammadi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Brookes,+P">Paul Brookes</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gong,+J">Jingzhi Gong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Z">Zheng Wang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Submitted to IEEE TAI </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> Generative pretrained transformers (GPT) are the common large language models (LLMs) used for generating text from natural language inputs. However, the fixed properties of language parameters in individual LLMs can lead to inconsistencies in the generated outputs. This limitation also restricts the models' ability to represent diverse language patterns due to inherent biases. Moreover, many powerful LLMs are closed-source. This prevents organizations from integrating their data into these systems, raising concerns about data privacy and limiting industry applications. Inspired by the successful application of LLM ensemble models in text generation, recent literature has also investigated their potential in code generation. This article reviews these emerging LLM ensemble approaches. Our goal is to enhance readers' understanding of existing techniques and encourage further research and practical implementation, aiming to expand the real-world applications of LLM ensemble models in both text and code generation. We categorize these approaches into seven main methods: weight merging, knowledge fusion, mixture of experts, reward ensemble, output ensemble, routing, and cascading. From this list, we focus on four methods and models that show strong performance and potential for broader applications. We analyze their modeling steps, training methods, and output features to provide a clear understanding of their capabilities. Our findings highlight the benefits of LLM ensemble techniques. These include better representation of diversity, improved output quality, and greater flexibility in applications. This information offers valuable insights for selecting models for various real-world tasks involving text and code generation, and potentially applying methods to multimodal LLMs. </p> </div> </dd> <dt> <a name='item103'>[103]</a> <a href ="/abs/2503.13512" title="Abstract" id="2503.13512"> arXiv:2503.13512 </a> (cross-list from stat.ML) [<a href="/pdf/2503.13512" title="Download PDF" id="pdf-2503.13512" aria-labelledby="pdf-2503.13512">pdf</a>, <a href="https://arxiv.org/html/2503.13512v1" title="View HTML" id="html-2503.13512" aria-labelledby="html-2503.13512" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13512" title="Other formats" id="oth-2503.13512" aria-labelledby="oth-2503.13512">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Positivity sets of hinge functions </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Schicho,+J">Josef Schicho</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Tewari,+A+K">Ayush Kumar Tewari</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Warren,+A">Audie Warren</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Discrete Mathematics (cs.DM); Machine Learning (cs.LG); Symbolic Computation (cs.SC); Combinatorics (math.CO); Functional Analysis (math.FA) </div> <p class='mathjax'> In this paper we investigate which subsets of the real plane are realisable as the set of points on which a one-layer ReLU neural network takes a positive value. In the case of cones we give a full characterisation of such sets. Furthermore, we give a necessary condition for any subset of $\mathbb R^d$. We give various examples of such one-layer neural networks. </p> </div> </dd> <dt> <a name='item104'>[104]</a> <a href ="/abs/2503.13520" title="Abstract" id="2503.13520"> arXiv:2503.13520 </a> (cross-list from cs.CL) [<a href="/pdf/2503.13520" title="Download PDF" id="pdf-2503.13520" aria-labelledby="pdf-2503.13520">pdf</a>, <a href="https://arxiv.org/html/2503.13520v1" title="View HTML" id="html-2503.13520" aria-labelledby="html-2503.13520" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13520" title="Other formats" id="oth-2503.13520" aria-labelledby="oth-2503.13520">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Evaluating the Process Modeling Abilities of Large Language Models -- Preliminary Foundations and Results </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Fettke,+P">Peter Fettke</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Houy,+C">Constantin Houy</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 10 pages, 1 figure, submitted to 20th International Conference on Wirtschaftsinformatik 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Machine Learning (cs.LG); Software Engineering (cs.SE) </div> <p class='mathjax'> Large language models (LLM) have revolutionized the processing of natural language. Although first benchmarks of the process modeling abilities of LLM are promising, it is currently under debate to what extent an LLM can generate good process models. In this contribution, we argue that the evaluation of the process modeling abilities of LLM is far from being trivial. Hence, available evaluation results must be taken carefully. For example, even in a simple scenario, not only the quality of a model should be taken into account, but also the costs and time needed for generation. Thus, an LLM does not generate one optimal solution, but a set of Pareto-optimal variants. Moreover, there are several further challenges which have to be taken into account, e.g. conceptualization of quality, validation of results, generalizability, and data leakage. We discuss these challenges in detail and discuss future experiments to tackle these challenges scientifically. </p> </div> </dd> <dt> <a name='item105'>[105]</a> <a href ="/abs/2503.13522" title="Abstract" id="2503.13522"> arXiv:2503.13522 </a> (cross-list from q-bio.BM) [<a href="/pdf/2503.13522" title="Download PDF" id="pdf-2503.13522" aria-labelledby="pdf-2503.13522">pdf</a>, <a href="https://arxiv.org/html/2503.13522v1" title="View HTML" id="html-2503.13522" aria-labelledby="html-2503.13522" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13522" title="Other formats" id="oth-2503.13522" aria-labelledby="oth-2503.13522">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Advanced Deep Learning Methods for Protein Structure Prediction and Design </div> <div class='list-authors'><a href="https://arxiv.org/search/q-bio?searchtype=author&query=Wu,+W">Weikun Wu</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Wang,+T">Tianyang Wang</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Zhang,+Y">Yichao Zhang</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Deng,+N">Ningyuan Deng</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Song,+X">Xinyuan Song</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Bi,+Z">Ziqian Bi</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Yao,+Z">Zheyu Yao</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Chen,+K">Keyu Chen</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Li,+M">Ming Li</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Niu,+Q">Qian Niu</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Liu,+J">Junyu Liu</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Peng,+B">Benji Peng</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Zhang,+S">Sen Zhang</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Liu,+M">Ming Liu</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Zhang,+L">Li Zhang</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Pan,+X">Xuanhe Pan</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Wang,+J">Jinlang Wang</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Feng,+P">Pohsun Feng</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Wen,+Y">Yizhu Wen</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Yan,+L+K">Lawrence KQ Yan</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Tseng,+H">Hongming Tseng</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Zhong,+Y">Yan Zhong</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Wang,+Y">Yunze Wang</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Qin,+Z">Ziyuan Qin</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Jing,+B">Bowen Jing</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Yang,+J">Junjie Yang</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Zhou,+J">Jun Zhou</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Liang,+C+X">Chia Xin Liang</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Song,+J">Junhao Song</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Biomolecules (q-bio.BM)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> After AlphaFold won the Nobel Prize, protein prediction with deep learning once again became a hot topic. We comprehensively explore advanced deep learning methods applied to protein structure prediction and design. It begins by examining recent innovations in prediction architectures, with detailed discussions on improvements such as diffusion based frameworks and novel pairwise attention modules. The text analyses key components including structure generation, evaluation metrics, multiple sequence alignment processing, and network architecture, thereby illustrating the current state of the art in computational protein modelling. Subsequent chapters focus on practical applications, presenting case studies that range from individual protein predictions to complex biomolecular interactions. Strategies for enhancing prediction accuracy and integrating deep learning techniques with experimental validation are thoroughly explored. The later sections review the industry landscape of protein design, highlighting the transformative role of artificial intelligence in biotechnology and discussing emerging market trends and future challenges. Supplementary appendices provide essential resources such as databases and open source tools, making this volume a valuable reference for researchers and students. </p> </div> </dd> <dt> <a name='item106'>[106]</a> <a href ="/abs/2503.13528" title="Abstract" id="2503.13528"> arXiv:2503.13528 </a> (cross-list from eess.SP) [<a href="/pdf/2503.13528" title="Download PDF" id="pdf-2503.13528" aria-labelledby="pdf-2503.13528">pdf</a>, <a href="https://arxiv.org/html/2503.13528v1" title="View HTML" id="html-2503.13528" aria-labelledby="html-2503.13528" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13528" title="Other formats" id="oth-2503.13528" aria-labelledby="oth-2503.13528">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Internet of Things-Based Smart Precision Farming in Soilless Agriculture: Opportunities and Challenges for Global Food Security </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Dutta,+M">Monica Dutta</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Gupta,+D">Deepali Gupta</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Tharewal,+S">Sumegh Tharewal</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Goyal,+D">Deepam Goyal</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Sandhu,+J+K">Jasminder Kaur Sandhu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Kaur,+M">Manjit Kaur</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Alzubi,+A+A">Ahmad Ali Alzubi</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Alanazi,+J+M">Jazem Mutared Alanazi</a></div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> IEEE Access ( Volume: 13) 2025, 34238 - 34268 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> The rapid growth of the global population and the continuous decline in cultivable land pose significant threats to food security. This challenge worsens as climate change further reduces the availability of farmland. Soilless agriculture, such as hydroponics, aeroponics, and aquaponics, offers a sustainable solution by enabling efficient crop cultivation in controlled environments. The integration of the Internet of Things (IoT) with smart precision farming improves resource efficiency, automates environmental control, and ensures stable and high-yield crop production. IoT-enabled smart farming systems utilize real-time monitoring, data-driven decision-making, and automation to optimize water and nutrient usage while minimizing human intervention. This paper explores the opportunities and challenges of IoT-based soilless farming, highlighting its role in sustainable agriculture, urban farming, and global food security. These advanced farming methods ensure greater productivity, resource conservation, and year-round cultivation. However, they also face challenges such as high initial investment, technological dependency, and energy consumption. Through a comprehensive study, bibliometric analysis, and comparative analysis, this research highlights current trends and research gaps. It also outlines future directions for researchers, policymakers, and industry stakeholders to drive innovation and scalability in IoT-driven soilless agriculture. By emphasizing the benefits of vertical farming and Controlled Environment Agriculture (CEA)-enabled soilless techniques, this paper supports informed decision-making to address food security challenges and promote sustainable agricultural innovations. </p> </div> </dd> <dt> <a name='item107'>[107]</a> <a href ="/abs/2503.13531" title="Abstract" id="2503.13531"> arXiv:2503.13531 </a> (cross-list from cs.CV) [<a href="/pdf/2503.13531" title="Download PDF" id="pdf-2503.13531" aria-labelledby="pdf-2503.13531">pdf</a>, <a href="https://arxiv.org/html/2503.13531v1" title="View HTML" id="html-2503.13531" aria-labelledby="html-2503.13531" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13531" title="Other formats" id="oth-2503.13531" aria-labelledby="oth-2503.13531">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Context-aware Multimodal AI Reveals Hidden Pathways in Five Centuries of Art Evolution </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+J">Jin Kim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lee,+B">Byunghwee Lee</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=You,+T">Taekho You</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yun,+J">Jinhyuk Yun</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 30 pages, 4 figures. Some example paintings are blurred to avoid potential copyright violations </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI); Computers and Society (cs.CY); Machine Learning (cs.LG) </div> <p class='mathjax'> The rise of multimodal generative AI is transforming the intersection of technology and art, offering deeper insights into large-scale artwork. Although its creative capabilities have been widely explored, its potential to represent artwork in latent spaces remains underexamined. We use cutting-edge generative AI, specifically Stable Diffusion, to analyze 500 years of Western paintings by extracting two types of latent information with the model: formal aspects (e.g., colors) and contextual aspects (e.g., subject). Our findings reveal that contextual information differentiates between artistic periods, styles, and individual artists more successfully than formal elements. Additionally, using contextual keywords extracted from paintings, we show how artistic expression evolves alongside societal changes. Our generative experiment, infusing prospective contexts into historical artworks, successfully reproduces the evolutionary trajectory of artworks, highlighting the significance of mutual interaction between society and art. This study demonstrates how multimodal AI expands traditional formal analysis by integrating temporal, cultural, and historical contexts. </p> </div> </dd> <dt> <a name='item108'>[108]</a> <a href ="/abs/2503.13558" title="Abstract" id="2503.13558"> arXiv:2503.13558 </a> (cross-list from eess.SP) [<a href="/pdf/2503.13558" title="Download PDF" id="pdf-2503.13558" aria-labelledby="pdf-2503.13558">pdf</a>, <a href="https://arxiv.org/html/2503.13558v1" title="View HTML" id="html-2503.13558" aria-labelledby="html-2503.13558" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13558" title="Other formats" id="oth-2503.13558" aria-labelledby="oth-2503.13558">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Survival Analysis with Machine Learning for Predicting Li-ion Battery Remaining Useful Life </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Xue,+J">Jingyuan Xue</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Wei,+L">Longfei Wei</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Sheng,+F">Fang Sheng</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Gao,+Y">Yuxin Gao</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Zhang,+J">Jianfei Zhang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> The accurate prediction of RUL for lithium-ion batteries is crucial for enhancing the reliability and longevity of energy storage systems. Traditional methods for RUL prediction often struggle with issues such as data sparsity, varying battery chemistries, and the inability to capture complex degradation patterns over time. In this study, we propose a survival analysis-based framework combined with deep learning models to predict the RUL of lithium-ion batteries. Specifically, we utilize five advanced models: the Cox-type models (Cox, CoxPH, and CoxTime) and two machine-learning-based models (DeepHit and MTLR). These models address the challenges of accurate RUL estimation by transforming raw time-series battery data into survival data, including key degradation indicators such as voltage, current, and internal resistance. Advanced feature extraction techniques enhance the model's robustness in diverse real-world scenarios, including varying charging conditions and battery chemistries. Our models are tested using 10-fold cross-validation, ensuring generalizability and minimizing overfitting. Experimental results show that our survival-based framework significantly improves RUL prediction accuracy compared to traditional methods, providing a reliable tool for battery management and maintenance optimization. This study contributes to the advancement of predictive maintenance in battery technology, offering valuable insights for both researchers and industry practitioners aiming to enhance the operational lifespan of lithium-ion batteries. </p> </div> </dd> <dt> <a name='item109'>[109]</a> <a href ="/abs/2503.13562" title="Abstract" id="2503.13562"> arXiv:2503.13562 </a> (cross-list from stat.ML) [<a href="/pdf/2503.13562" title="Download PDF" id="pdf-2503.13562" aria-labelledby="pdf-2503.13562">pdf</a>, <a href="https://arxiv.org/html/2503.13562v1" title="View HTML" id="html-2503.13562" aria-labelledby="html-2503.13562" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13562" title="Other formats" id="oth-2503.13562" aria-labelledby="oth-2503.13562">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Micro Text Classification Based on Balanced Positive-Unlabeled Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Jia,+L">Lin-Han Jia</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Guo,+L">Lan-Zhe Guo</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Zhou,+Z">Zhi Zhou</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Han,+S">Si-Ye Han</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Li,+Z">Zi-Wen Li</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Li,+Y">Yu-Feng Li</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> In real-world text classification tasks, negative texts often contain a minimal proportion of negative content, which is especially problematic in areas like text quality control, legal risk screening, and sensitive information interception. This challenge manifests at two levels: at the macro level, distinguishing negative texts is difficult due to the high similarity between coarse-grained positive and negative samples; at the micro level, the issue stems from extreme class imbalance and a lack of fine-grained labels. To address these challenges, we propose transforming the coarse-grained positive-negative (PN) classification task into an imbalanced fine-grained positive-unlabeled (PU) classification problem, supported by theoretical analysis. We introduce a novel framework, Balanced Fine-Grained Positive-Unlabeled (BFGPU) learning, which features a unique PU learning loss function that optimizes macro-level performance amidst severe imbalance at the micro level. The framework's performance is further boosted by rebalanced pseudo-labeling and threshold adjustment. Extensive experiments on both public and real-world datasets demonstrate the effectiveness of BFGPU, which outperforms other methods, even in extreme scenarios where both macro and micro levels are highly imbalanced. </p> </div> </dd> <dt> <a name='item110'>[110]</a> <a href ="/abs/2503.13565" title="Abstract" id="2503.13565"> arXiv:2503.13565 </a> (cross-list from cs.CL) [<a href="/pdf/2503.13565" title="Download PDF" id="pdf-2503.13565" aria-labelledby="pdf-2503.13565">pdf</a>, <a href="https://arxiv.org/html/2503.13565v1" title="View HTML" id="html-2503.13565" aria-labelledby="html-2503.13565" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13565" title="Other formats" id="oth-2503.13565" aria-labelledby="oth-2503.13565">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> ML-SpecQD: Multi-Level Speculative Decoding with Quantized Drafts </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Georganas,+E">Evangelos Georganas</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kalamkar,+D">Dhiraj Kalamkar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kozlov,+A">Alexander Kozlov</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Heinecke,+A">Alexander Heinecke</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> Speculative decoding (SD) has emerged as a method to accelerate LLM inference without sacrificing any accuracy over the 16-bit model inference. In a typical SD setup, the idea is to use a full-precision, small, fast model as "draft" to generate the next few tokens and use the "target" large model to verify the draft-generated tokens. The efficacy of this method heavily relies on the acceptance ratio of the draft-generated tokens and the relative token throughput of the draft versus the target model. Nevertheless, an efficient SD pipeline requires pre-training and aligning the draft model to the target model, making it impractical for LLM inference in a plug-and-play fashion. In this work, we propose using MXFP4 models as drafts in a plug-and-play fashion since the MXFP4 Weight-Only-Quantization (WOQ) merely direct-casts the BF16 target model weights to MXFP4. In practice, our plug-and-play solution gives speedups up to 2x over the BF16 baseline. Then we pursue an opportunity for further acceleration: the MXFP4 draft token generation itself can be accelerated via speculative decoding by using yet another smaller draft. We call our method ML-SpecQD: Multi-Level Speculative Decoding with Quantized Drafts since it recursively applies speculation for accelerating the draft-token generation. Combining Multi-Level Speculative Decoding with MXFP4 Quantized Drafts we outperform state-of-the-art speculative decoding, yielding speedups up to 2.72x over the BF16 baseline. </p> </div> </dd> <dt> <a name='item111'>[111]</a> <a href ="/abs/2503.13566" title="Abstract" id="2503.13566"> arXiv:2503.13566 </a> (cross-list from eess.SP) [<a href="/pdf/2503.13566" title="Download PDF" id="pdf-2503.13566" aria-labelledby="pdf-2503.13566">pdf</a>, <a href="/format/2503.13566" title="Other formats" id="oth-2503.13566" aria-labelledby="oth-2503.13566">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Classification of power quality events in the transmission grid: comparative evaluation of different machine learning models </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=G%C3%BCvengir,+U">Umut G眉vengir</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=K%C3%BC%C3%A7%C3%BCk,+D">Dilek K眉莽眉k</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Buhan,+S">Serkan Buhan</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Manta%C5%9F,+C+A">Cuma Ali Manta艧</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Yeniceli,+M">Murathan Yeniceli</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Presented at CIGRE SEERC 2023 Conference </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Automatic classification of electric power quality events with respect to their root causes is critical for electrical grid management. In this paper, we present comparative evaluation results of an extensive set of machine learning models for the classification of power quality events, based on their root causes. After extensive experiments using different machine learning libraries, it is observed that the best performing learning models turn out to be Cubic SVM and XGBoost. During error analysis, it is observed that the main source of performance degradation for both models is the classification of ABC faults as ABCG faults, or vice versa. Ultimately, the models achieving the best results will be integrated into the event classification module of a large-scale power quality and grid monitoring system for the Turkish electricity transmission system. </p> </div> </dd> <dt> <a name='item112'>[112]</a> <a href ="/abs/2503.13568" title="Abstract" id="2503.13568"> arXiv:2503.13568 </a> (cross-list from cs.RO) [<a href="/pdf/2503.13568" title="Download PDF" id="pdf-2503.13568" aria-labelledby="pdf-2503.13568">pdf</a>, <a href="https://arxiv.org/html/2503.13568v1" title="View HTML" id="html-2503.13568" aria-labelledby="html-2503.13568" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13568" title="Other formats" id="oth-2503.13568" aria-labelledby="oth-2503.13568">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> WMINet: A Wheel-Mounted Inertial Learning Approach For Mobile-Robot Positioning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Versano,+G">Gal Versano</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Klein,+I">Itzik Klein</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> Autonomous mobile robots are widely used for navigation, transportation, and inspection tasks indoors and outdoors. In practical situations of limited satellite signals or poor lighting conditions, navigation depends only on inertial sensors. In such cases, the navigation solution rapidly drifts due to inertial measurement errors. In this work, we propose WMINet a wheel-mounted inertial deep learning approach to estimate the mobile robot's position based only on its inertial sensors. To that end, we merge two common practical methods to reduce inertial drift: a wheel-mounted approach and driving the mobile robot in periodic trajectories. Additionally, we enforce a wheelbase constraint to further improve positioning performance. To evaluate our proposed approach we recorded using the Rosbot-XL a wheel-mounted initial dataset totaling 190 minutes, which is made publicly available. Our approach demonstrated a 66\% improvement over state-of-the-art approaches. As a consequence, our approach enables navigation in challenging environments and bridges the pure inertial gap. This enables seamless robot navigation using only inertial sensors for short periods. </p> </div> </dd> <dt> <a name='item113'>[113]</a> <a href ="/abs/2503.13572" title="Abstract" id="2503.13572"> arXiv:2503.13572 </a> (cross-list from cs.AR) [<a href="/pdf/2503.13572" title="Download PDF" id="pdf-2503.13572" aria-labelledby="pdf-2503.13572">pdf</a>, <a href="/format/2503.13572" title="Other formats" id="oth-2503.13572" aria-labelledby="oth-2503.13572">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> VeriContaminated: Assessing LLM-Driven Verilog Coding for Data Contamination </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Z">Zeng Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shao,+M">Minghao Shao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bhandari,+J">Jitendra Bhandari</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mankali,+L">Likhitha Mankali</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Karri,+R">Ramesh Karri</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sinanoglu,+O">Ozgur Sinanoglu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shafique,+M">Muhammad Shafique</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Knechtel,+J">Johann Knechtel</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Hardware Architecture (cs.AR)</span>; Cryptography and Security (cs.CR); Machine Learning (cs.LG) </div> <p class='mathjax'> Large Language Models (LLMs) have revolutionized code generation, achieving exceptional results on various established benchmarking frameworks. However, concerns about data contamination - where benchmark data inadvertently leaks into pre-training or fine-tuning datasets - raise questions about the validity of these evaluations. While this issue is known, limiting the industrial adoption of LLM-driven software engineering, hardware coding has received little to no attention regarding these risks. For the first time, we analyze state-of-the-art (SOTA) evaluation frameworks for Verilog code generation (VerilogEval and RTLLM), using established methods for contamination detection (CCD and Min-K% Prob). We cover SOTA commercial and open-source LLMs (CodeGen2.5, Minitron 4b, Mistral 7b, phi-4 mini, LLaMA-{1,2,3.1}, GPT-{2,3.5,4o}, Deepseek-Coder, and CodeQwen 1.5), in baseline and fine-tuned models (RTLCoder and Verigen). Our study confirms that data contamination is a critical concern. We explore mitigations and the resulting trade-offs for code quality vs fairness (i.e., reducing contamination toward unbiased benchmarking). </p> </div> </dd> <dt> <a name='item114'>[114]</a> <a href ="/abs/2503.13573" title="Abstract" id="2503.13573"> arXiv:2503.13573 </a> (cross-list from cs.RO) [<a href="/pdf/2503.13573" title="Download PDF" id="pdf-2503.13573" aria-labelledby="pdf-2503.13573">pdf</a>, <a href="/format/2503.13573" title="Other formats" id="oth-2503.13573" aria-labelledby="oth-2503.13573">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Online Signature Verification based on the Lagrange formulation with 2D and 3D robotic models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Diaz,+M">Moises Diaz</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ferrer,+M+A">Miguel A. Ferrer</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gil,+J+M">Juan M. Gil</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rodriguez,+R">Rafael Rodriguez</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+P">Peirong Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jin,+L">Lianwen Jin</a></div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> Science direct, March 17 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span>; Computer Vision and Pattern Recognition (cs.CV); Machine Learning (cs.LG) </div> <p class='mathjax'> Online Signature Verification commonly relies on function-based features, such as time-sampled horizontal and vertical coordinates, as well as the pressure exerted by the writer, obtained through a digitizer. Although inferring additional information about the writers arm pose, kinematics, and dynamics based on digitizer data can be useful, it constitutes a challenge. In this paper, we tackle this challenge by proposing a new set of features based on the dynamics of online signatures. These new features are inferred through a Lagrangian formulation, obtaining the sequences of generalized coordinates and torques for 2D and 3D robotic arm models. By combining kinematic and dynamic robotic features, our results demonstrate their significant effectiveness for online automatic signature verification and achieving state-of-the-art results when integrated into deep learning models. </p> </div> </dd> <dt> <a name='item115'>[115]</a> <a href ="/abs/2503.13577" title="Abstract" id="2503.13577"> arXiv:2503.13577 </a> (cross-list from cs.MA) [<a href="/pdf/2503.13577" title="Download PDF" id="pdf-2503.13577" aria-labelledby="pdf-2503.13577">pdf</a>, <a href="https://arxiv.org/html/2503.13577v1" title="View HTML" id="html-2503.13577" aria-labelledby="html-2503.13577" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13577" title="Other formats" id="oth-2503.13577" aria-labelledby="oth-2503.13577">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> When Should We Orchestrate Multiple Agents? </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Bhatt,+U">Umang Bhatt</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kapoor,+S">Sanyam Kapoor</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Upadhyay,+M">Mihir Upadhyay</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sucholutsky,+I">Ilia Sucholutsky</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Quinzan,+F">Francesco Quinzan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Collins,+K+M">Katherine M. Collins</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Weller,+A">Adrian Weller</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wilson,+A+G">Andrew Gordon Wilson</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zafar,+M+B">Muhammad Bilal Zafar</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Multiagent Systems (cs.MA)</span>; Computers and Society (cs.CY); Machine Learning (cs.LG) </div> <p class='mathjax'> Strategies for orchestrating the interactions between multiple agents, both human and artificial, can wildly overestimate performance and underestimate the cost of orchestration. We design a framework to orchestrate agents under realistic conditions, such as inference costs or availability constraints. We show theoretically that orchestration is only effective if there are performance or cost differentials between agents. We then empirically demonstrate how orchestration between multiple agents can be helpful for selecting agents in a simulated environment, picking a learning strategy in the infamous Rogers' Paradox from social science, and outsourcing tasks to other agents during a question-answer task in a user study. </p> </div> </dd> <dt> <a name='item116'>[116]</a> <a href ="/abs/2503.13578" title="Abstract" id="2503.13578"> arXiv:2503.13578 </a> (cross-list from eess.SP) [<a href="/pdf/2503.13578" title="Download PDF" id="pdf-2503.13578" aria-labelledby="pdf-2503.13578">pdf</a>, <a href="https://arxiv.org/html/2503.13578v1" title="View HTML" id="html-2503.13578" aria-labelledby="html-2503.13578" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13578" title="Other formats" id="oth-2503.13578" aria-labelledby="oth-2503.13578">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Convolutional neural network for early detection of lameness and irregularity in horses using an IMU sensor </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Savoini,+B">Beno卯t Savoini</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Bertolaccini,+J">Jonathan Bertolaccini</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Montavon,+S">St茅phane Montavon</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Deriaz,+M">Michel Deriaz</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted at AMLDS 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> Lameness and gait irregularities are significant concerns in equine health management, affecting performance, welfare, and economic value. Traditional observational methods rely on subjective expert assessments, which can lead to inconsistencies in detecting subtle or early-stage lameness. While AI-based approaches have emerged, many require multiple sensors, force plates, or video systems, making them costly and impractical for field deployment. In this applied research study, we present a stride-level classification system that utilizes a single inertial measurement unit (IMU) and a one-dimensional convolutional neural network (1D CNN) to objectively differentiate between sound and lame horses, with a primary focus on the trot gait. The proposed system was tested under real-world conditions, achieving a 90% session-level accuracy with no false positives, demonstrating its robustness for practical applications. By employing a single, non-intrusive, and readily available sensor, our approach significantly reduces the complexity and cost of hardware requirements while maintaining high classification performance. These results highlight the potential of our CNN-based method as a field-tested, scalable solution for automated lameness detection. By enabling early diagnosis, this system offers a valuable tool for preventing minor gait irregularities from developing into severe conditions, ultimately contributing to improved equine welfare and performance in veterinary and equestrian practice. </p> </div> </dd> <dt> <a name='item117'>[117]</a> <a href ="/abs/2503.13579" title="Abstract" id="2503.13579"> arXiv:2503.13579 </a> (cross-list from cs.GR) [<a href="/pdf/2503.13579" title="Download PDF" id="pdf-2503.13579" aria-labelledby="pdf-2503.13579">pdf</a>, <a href="https://arxiv.org/html/2503.13579v1" title="View HTML" id="html-2503.13579" aria-labelledby="html-2503.13579" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13579" title="Other formats" id="oth-2503.13579" aria-labelledby="oth-2503.13579">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> ASMR: Adaptive Skeleton-Mesh Rigging and Skinning via 2D Generative Prior </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Hong,+S">Seokhyeon Hong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Choi,+S">Soojin Choi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+C">Chaelin Kim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cha,+S">Sihun Cha</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Noh,+J">Junyong Noh</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Eurographics 2025; Project Page <a href="https://seokhyeonhong.github.io/projects/asmr/" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Graphics (cs.GR)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> Despite the growing accessibility of skeletal motion data, integrating it for animating character meshes remains challenging due to diverse configurations of both skeletons and meshes. Specifically, the body scale and bone lengths of the skeleton should be adjusted in accordance with the size and proportions of the mesh, ensuring that all joints are accurately positioned within the character mesh. Furthermore, defining skinning weights is complicated by variations in skeletal configurations, such as the number of joints and their hierarchy, as well as differences in mesh configurations, including their connectivity and shapes. While existing approaches have made efforts to automate this process, they hardly address the variations in both skeletal and mesh configurations. In this paper, we present a novel method for the automatic rigging and skinning of character meshes using skeletal motion data, accommodating arbitrary configurations of both meshes and skeletons. The proposed method predicts the optimal skeleton aligned with the size and proportion of the mesh as well as defines skinning weights for various mesh-skeleton configurations, without requiring explicit supervision tailored to each of them. By incorporating Diffusion 3D Features (Diff3F) as semantic descriptors of character meshes, our method achieves robust generalization across different configurations. To assess the performance of our method in comparison to existing approaches, we conducted comprehensive evaluations encompassing both quantitative and qualitative analyses, specifically examining the predicted skeletons, skinning weights, and deformation quality. </p> </div> </dd> <dt> <a name='item118'>[118]</a> <a href ="/abs/2503.13644" title="Abstract" id="2503.13644"> arXiv:2503.13644 </a> (cross-list from quant-ph) [<a href="/pdf/2503.13644" title="Download PDF" id="pdf-2503.13644" aria-labelledby="pdf-2503.13644">pdf</a>, <a href="/format/2503.13644" title="Other formats" id="oth-2503.13644" aria-labelledby="oth-2503.13644">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Quantum EigenGame for excited state calculation </div> <div class='list-authors'><a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Quiroga,+D">David Quiroga</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Han,+J">Jason Han</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Kyrillidis,+A">Anastasios Kyrillidis</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted at CPAL 2025, 28 pages </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Quantum Physics (quant-ph)</span>; Data Structures and Algorithms (cs.DS); Machine Learning (cs.LG); Optimization and Control (math.OC) </div> <p class='mathjax'> Computing the excited states of a given Hamiltonian is computationally hard for large systems, but methods that do so using quantum computers scale tractably. This problem is equivalent to the PCA problem where we are interested in decomposing a matrix into a collection of principal components. Classically, PCA is a well-studied problem setting, for which both centralized and distributed approaches have been developed. On the distributed side, one recent approach is that of EigenGame, a game-theoretic approach to finding eigenvectors where each eigenvector reaches a Nash equilibrium either sequentially or in parallel. With this work, we extend the EigenGame algorithm for both a $0^\text{th}$-order approach and for quantum computers, and harness the framework that quantum computing provides in computing excited states. Results show that using the Quantum EigenGame allows us to converge to excited states of a given Hamiltonian without the need of a deflation step. We also develop theory on error accumulation for finite-differences and parameterized approaches. </p> </div> </dd> <dt> <a name='item119'>[119]</a> <a href ="/abs/2503.13647" title="Abstract" id="2503.13647"> arXiv:2503.13647 </a> (cross-list from quant-ph) [<a href="/pdf/2503.13647" title="Download PDF" id="pdf-2503.13647" aria-labelledby="pdf-2503.13647">pdf</a>, <a href="https://arxiv.org/html/2503.13647v1" title="View HTML" id="html-2503.13647" aria-labelledby="html-2503.13647" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13647" title="Other formats" id="oth-2503.13647" aria-labelledby="oth-2503.13647">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> SRBB-Based Quantum State Preparation </div> <div class='list-authors'><a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Belli,+G">Giacomo Belli</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Mordacci,+M">Marco Mordacci</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Amoretti,+M">Michele Amoretti</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 9 pages, 8 figures, 6 tables </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Quantum Physics (quant-ph)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> In this work, a scalable algorithm for the approximate quantum state preparation problem is proposed, facing a challenge of fundamental importance in many topic areas of quantum computing. The algorithm uses a variational quantum circuit based on the Standard Recursive Block Basis (SRBB), a hierarchical construction for the matrix algebra of the $SU(2^n)$ group, which is capable of linking the variational parameters with the topology of the Lie group. Compared to the full algebra, using only diagonal components reduces the number of CNOTs by an exponential factor, as well as the circuit depth, in full agreement with the relaxation principle, inherent to the approximation methodology, of minimizing resources while achieving high accuracy. The desired quantum state is then approximated by a scalable quantum neural network, which is designed upon the diagonal SRBB sub-algebra. This approach provides a new scheme for approximate quantum state preparation in a variational framework and a specific use case for the SRBB hierarchy. The performance of the algorithm is assessed with different loss functions, like fidelity, trace distance, and Frobenius norm, in relation to two optimizers: Adam and Nelder-Mead. The results highlight the potential of SRBB in close connection with the geometry of unitary groups, achieving high accuracy up to 4 qubits in simulation, but also its current limitations with an increasing number of qubits. Additionally, the approximate SRBB-based QSP algorithm has been tested on real quantum devices to assess its performance with a small number of qubits. </p> </div> </dd> <dt> <a name='item120'>[120]</a> <a href ="/abs/2503.13676" title="Abstract" id="2503.13676"> arXiv:2503.13676 </a> (cross-list from stat.ML) [<a href="/pdf/2503.13676" title="Download PDF" id="pdf-2503.13676" aria-labelledby="pdf-2503.13676">pdf</a>, <a href="https://arxiv.org/html/2503.13676v1" title="View HTML" id="html-2503.13676" aria-labelledby="html-2503.13676" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13676" title="Other formats" id="oth-2503.13676" aria-labelledby="oth-2503.13676">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Bayesian Kernel Regression for Functional Data </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Kusaba,+M">Minoru Kusaba</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Iwayama,+M">Megumi Iwayama</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Yoshida,+R">Ryo Yoshida</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> In supervised learning, the output variable to be predicted is often represented as a function, such as a spectrum or probability distribution. Despite its importance, functional output regression remains relatively unexplored. In this study, we propose a novel functional output regression model based on kernel methods. Unlike conventional approaches that independently train regressors with scalar outputs for each measurement point of the output function, our method leverages the covariance structure within the function values, akin to multitask learning, leading to enhanced learning efficiency and improved prediction accuracy. Compared with existing nonlinear function-on-scalar models in statistical functional data analysis, our model effectively handles high-dimensional nonlinearity while maintaining a simple model structure. Furthermore, the fully kernel-based formulation allows the model to be expressed within the framework of reproducing kernel Hilbert space (RKHS), providing an analytic form for parameter estimation and a solid foundation for further theoretical analysis. The proposed model delivers a functional output predictive distribution derived analytically from a Bayesian perspective, enabling the quantification of uncertainty in the predicted function. We demonstrate the model's enhanced prediction performance through experiments on artificial datasets and density of states prediction tasks in materials science. </p> </div> </dd> <dt> <a name='item121'>[121]</a> <a href ="/abs/2503.13679" title="Abstract" id="2503.13679"> arXiv:2503.13679 </a> (cross-list from cs.PF) [<a href="/pdf/2503.13679" title="Download PDF" id="pdf-2503.13679" aria-labelledby="pdf-2503.13679">pdf</a>, <a href="https://arxiv.org/html/2503.13679v1" title="View HTML" id="html-2503.13679" aria-labelledby="html-2503.13679" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13679" title="Other formats" id="oth-2503.13679" aria-labelledby="oth-2503.13679">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> PrETi: Predicting Execution Time in Early Stage with LLVM and Machine Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+R">Risheng Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sieweck,+P">Philipp Sieweck</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=von+Hasseln,+H">Hermann von Hasseln</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nowotka,+D">Dirk Nowotka</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Performance (cs.PF)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> We introduce preti, a novel framework for predicting software execution time during the early stages of development. preti leverages an LLVM-based simulation environment to extract timing-related runtime information, such as the count of executed LLVM IR instructions. This information, combined with historical execution time data, is utilized to train machine learning models for accurate time prediction. To further enhance prediction accuracy, our approach incorporates simulations of cache accesses and branch prediction. The evaluations on public benchmarks demonstrate that preti achieves an average Absolute Percentage Error (APE) of 11.98\%, surpassing state-of-the-art methods. These results underscore the effectiveness and efficiency of preti as a robust solution for early-stage timing analysis. </p> </div> </dd> <dt> <a name='item122'>[122]</a> <a href ="/abs/2503.13690" title="Abstract" id="2503.13690"> arXiv:2503.13690 </a> (cross-list from cs.CL) [<a href="/pdf/2503.13690" title="Download PDF" id="pdf-2503.13690" aria-labelledby="pdf-2503.13690">pdf</a>, <a href="https://arxiv.org/html/2503.13690v1" title="View HTML" id="html-2503.13690" aria-labelledby="html-2503.13690" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13690" title="Other formats" id="oth-2503.13690" aria-labelledby="oth-2503.13690">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Atyaephyra at SemEval-2025 Task 4: Low-Rank NPO </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Bronec,+J">Jan Bronec</a> (1), <a href="https://arxiv.org/search/cs?searchtype=author&query=Helcl,+J">Jind艡ich Helcl</a> (1) ((1) Charles University, Faculty of Mathematics and Physics, Institute of Formal and Applied Linguistics)</div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 5 pages, 1 figure, 1 table, submitted to SemEval proceedings for ACL Anthology </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> We present a submission to the SemEval 2025 shared task on unlearning sensitive content from LLMs. Our approach employs negative preference optimization using low-rank adaptation. We show that we can utilize this combination to cheaply compute additional regularization terms, which help with unlearning stabilization. The results of our approach significantly exceed the shared task baselines. </p> </div> </dd> <dt> <a name='item123'>[123]</a> <a href ="/abs/2503.13751" title="Abstract" id="2503.13751"> arXiv:2503.13751 </a> (cross-list from stat.ML) [<a href="/pdf/2503.13751" title="Download PDF" id="pdf-2503.13751" aria-labelledby="pdf-2503.13751">pdf</a>, <a href="/format/2503.13751" title="Other formats" id="oth-2503.13751" aria-labelledby="oth-2503.13751">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Optimizing ML Training with Metagradient Descent </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Engstrom,+L">Logan Engstrom</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Ilyas,+A">Andrew Ilyas</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Chen,+B">Benjamin Chen</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Feldmann,+A">Axel Feldmann</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Moses,+W">William Moses</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Madry,+A">Aleksander Madry</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> A major challenge in training large-scale machine learning models is configuring the training process to maximize model performance, i.e., finding the best training setup from a vast design space. In this work, we unlock a gradient-based approach to this problem. We first introduce an algorithm for efficiently calculating metagradients -- gradients through model training -- at scale. We then introduce a "smooth model training" framework that enables effective optimization using metagradients. With metagradient descent (MGD), we greatly improve on existing dataset selection methods, outperform accuracy-degrading data poisoning attacks by an order of magnitude, and automatically find competitive learning rate schedules. </p> </div> </dd> <dt> <a name='item124'>[124]</a> <a href ="/abs/2503.13786" title="Abstract" id="2503.13786"> arXiv:2503.13786 </a> (cross-list from cs.SE) [<a href="/pdf/2503.13786" title="Download PDF" id="pdf-2503.13786" aria-labelledby="pdf-2503.13786">pdf</a>, <a href="/format/2503.13786" title="Other formats" id="oth-2503.13786" aria-labelledby="oth-2503.13786">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Evaluating the Application of SOLID Principles in Modern AI Framework Architectures </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Shrestha,+J">Jonesh Shrestha</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 5 pages, 1 figure, 12 references </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Software Engineering (cs.SE)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> This research evaluates the extent to which modern AI frameworks, specifically TensorFlow and scikit-learn, adhere to the SOLID design principles - Single Responsibility, Open/Closed, Liskov Substitution, Interface Segregation, and Dependency Inversion. Analyzing the frameworks architectural documentation and design philosophies, this research investigates architectural trade-offs when balancing software engineering best practices with AI-specific needs. I examined each frameworks documentation, source code, and architectural components to evaluate their adherence to these principles. The results show that both frameworks adopt certain aspects of SOLID design principles but make intentional trade-offs to address performance, scalability, and the experimental nature of AI development. TensorFlow focuses on performance and scalability, sometimes sacrificing strict adherence to principles like Single Responsibility and Interface Segregation. While scikit-learns design philosophy aligns more closely with SOLID principles through consistent interfaces and composition principles, sticking closer to SOLID guidelines but with occasional deviations for performance optimizations and scalability. This research discovered that applying SOLID principles in AI frameworks depends on context, as performance, scalability, and flexibility often require deviations from traditional software engineering principles. This research contributes to understanding how domain-specific constraints influence architectural decisions in modern AI frameworks and how these frameworks strategically adapted design choices to effectively balance these contradicting requirements. </p> </div> </dd> <dt> <a name='item125'>[125]</a> <a href ="/abs/2503.13791" title="Abstract" id="2503.13791"> arXiv:2503.13791 </a> (cross-list from stat.ML) [<a href="/pdf/2503.13791" title="Download PDF" id="pdf-2503.13791" aria-labelledby="pdf-2503.13791">pdf</a>, <a href="/format/2503.13791" title="Other formats" id="oth-2503.13791" aria-labelledby="oth-2503.13791">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> ROCK: A variational formulation for occupation kernel methods in Reproducing Kernel Hilbert Spaces </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Rielly,+V">Victor Rielly</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Lahouel,+K">Kamel Lahouel</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Nguyen,+C">Chau Nguyen</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Jedynak,+B">Bruno Jedynak</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> We present a Representer Theorem result for a large class of weak formulation problems. We provide examples of applications of our formulation both in traditional machine learning and numerical methods as well as in new and emerging techniques. Finally we apply our formulation to generalize the multivariate occupation kernel (MOCK) method for learning dynamical systems from data proposing the more general Riesz Occupation Kernel (ROCK) method. Our generalized methods are both more computationally efficient and performant on most of the benchmarks we test against. </p> </div> </dd> <dt> <a name='item126'>[126]</a> <a href ="/abs/2503.13805" title="Abstract" id="2503.13805"> arXiv:2503.13805 </a> (cross-list from cs.CV) [<a href="/pdf/2503.13805" title="Download PDF" id="pdf-2503.13805" aria-labelledby="pdf-2503.13805">pdf</a>, <a href="https://arxiv.org/html/2503.13805v1" title="View HTML" id="html-2503.13805" aria-labelledby="html-2503.13805" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13805" title="Other formats" id="oth-2503.13805" aria-labelledby="oth-2503.13805">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Text-Guided Image Invariant Feature Learning for Robust Image Watermarking </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ahtesham,+M">Muhammad Ahtesham</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhong,+X">Xin Zhong</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Machine Learning (cs.LG); Multimedia (cs.MM) </div> <p class='mathjax'> Ensuring robustness in image watermarking is crucial for and maintaining content integrity under diverse transformations. Recent self-supervised learning (SSL) approaches, such as DINO, have been leveraged for watermarking but primarily focus on general feature representation rather than explicitly learning invariant features. In this work, we propose a novel text-guided invariant feature learning framework for robust image watermarking. Our approach leverages CLIP's multimodal capabilities, using text embeddings as stable semantic anchors to enforce feature invariance under distortions. We evaluate the proposed method across multiple datasets, demonstrating superior robustness against various image transformations. Compared to state-of-the-art SSL methods, our model achieves higher cosine similarity in feature consistency tests and outperforms existing watermarking schemes in extraction accuracy under severe distortions. These results highlight the efficacy of our method in learning invariant representations tailored for robust deep learning-based watermarking. </p> </div> </dd> <dt> <a name='item127'>[127]</a> <a href ="/abs/2503.13817" title="Abstract" id="2503.13817"> arXiv:2503.13817 </a> (cross-list from cs.AI) [<a href="/pdf/2503.13817" title="Download PDF" id="pdf-2503.13817" aria-labelledby="pdf-2503.13817">pdf</a>, <a href="https://arxiv.org/html/2503.13817v1" title="View HTML" id="html-2503.13817" aria-labelledby="html-2503.13817" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13817" title="Other formats" id="oth-2503.13817" aria-labelledby="oth-2503.13817">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> VARP: Reinforcement Learning from Vision-Language Model Feedback with Agent Regularized Preferences </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Singh,+A">Anukriti Singh</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bhaskar,+A">Amisha Bhaskar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+P">Peihong Yu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chakraborty,+S">Souradip Chakraborty</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dasyam,+R">Ruthwik Dasyam</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bedi,+A">Amrit Bedi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tokekar,+P">Pratap Tokekar</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 8 pages </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Artificial Intelligence (cs.AI)</span>; Human-Computer Interaction (cs.HC); Machine Learning (cs.LG); Robotics (cs.RO) </div> <p class='mathjax'> Designing reward functions for continuous-control robotics often leads to subtle misalignments or reward hacking, especially in complex tasks. Preference-based RL mitigates some of these pitfalls by learning rewards from comparative feedback rather than hand-crafted signals, yet scaling human annotations remains challenging. Recent work uses Vision-Language Models (VLMs) to automate preference labeling, but a single final-state image generally fails to capture the agent's full motion. In this paper, we present a two-part solution that both improves feedback accuracy and better aligns reward learning with the agent's policy. First, we overlay trajectory sketches on final observations to reveal the path taken, allowing VLMs to provide more reliable preferences-improving preference accuracy by approximately 15-20% in metaworld tasks. Second, we regularize reward learning by incorporating the agent's performance, ensuring that the reward model is optimized based on data generated by the current policy; this addition boosts episode returns by 20-30% in locomotion tasks. Empirical studies on metaworld demonstrate that our method achieves, for instance, around 70-80% success rate in all tasks, compared to below 50% for standard approaches. These results underscore the efficacy of combining richer visual representations with agent-aware reward regularization. </p> </div> </dd> <dt> <a name='item128'>[128]</a> <a href ="/abs/2503.13833" title="Abstract" id="2503.13833"> arXiv:2503.13833 </a> (cross-list from cond-mat.mtrl-sci) [<a href="/pdf/2503.13833" title="Download PDF" id="pdf-2503.13833" aria-labelledby="pdf-2503.13833">pdf</a>, <a href="/format/2503.13833" title="Other formats" id="oth-2503.13833" aria-labelledby="oth-2503.13833">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Causal Discovery from Data Assisted by Large Language Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Barakati,+K">Kamyar Barakati</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Molak,+A">Alexander Molak</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Nelson,+C">Chris Nelson</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Zhang,+X">Xiaohang Zhang</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Takeuchi,+I">Ichiro Takeuchi</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Kalinin,+S+V">Sergei V. Kalinin</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 12 pages, 5 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Materials Science (cond-mat.mtrl-sci)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Knowledge driven discovery of novel materials necessitates the development of the causal models for the property emergence. While in classical physical paradigm the causal relationships are deduced based on the physical principles or via experiment, rapid accumulation of observational data necessitates learning causal relationships between dissimilar aspects of materials structure and functionalities based on observations. For this, it is essential to integrate experimental data with prior domain knowledge. Here we demonstrate this approach by combining high-resolution scanning transmission electron microscopy (STEM) data with insights derived from large language models (LLMs). By fine-tuning ChatGPT on domain-specific literature, such as arXiv papers on ferroelectrics, and combining obtained information with data-driven causal discovery, we construct adjacency matrices for Directed Acyclic Graphs (DAGs) that map the causal relationships between structural, chemical, and polarization degrees of freedom in Sm-doped BiFeO3 (SmBFO). This approach enables us to hypothesize how synthesis conditions influence material properties, particularly the coercive field (E0), and guides experimental validation. The ultimate objective of this work is to develop a unified framework that integrates LLM-driven literature analysis with data-driven discovery, facilitating the precise engineering of ferroelectric materials by establishing clear connections between synthesis conditions and their resulting material properties. </p> </div> </dd> <dt> <a name='item129'>[129]</a> <a href ="/abs/2503.13836" title="Abstract" id="2503.13836"> arXiv:2503.13836 </a> (cross-list from cs.CV) [<a href="/pdf/2503.13836" title="Download PDF" id="pdf-2503.13836" aria-labelledby="pdf-2503.13836">pdf</a>, <a href="https://arxiv.org/html/2503.13836v1" title="View HTML" id="html-2503.13836" aria-labelledby="html-2503.13836" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13836" title="Other formats" id="oth-2503.13836" aria-labelledby="oth-2503.13836">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> SALAD: Skeleton-aware Latent Diffusion for Text-driven Motion Generation and Editing </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Hong,+S">Seokhyeon Hong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+C">Chaelin Kim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yoon,+S">Serin Yoon</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nam,+J">Junghyun Nam</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cha,+S">Sihun Cha</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Noh,+J">Junyong Noh</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> CVPR 2025; Project page <a href="https://seokhyeonhong.github.io/projects/salad/" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI); Graphics (cs.GR); Machine Learning (cs.LG) </div> <p class='mathjax'> Text-driven motion generation has advanced significantly with the rise of denoising diffusion models. However, previous methods often oversimplify representations for the skeletal joints, temporal frames, and textual words, limiting their ability to fully capture the information within each modality and their interactions. Moreover, when using pre-trained models for downstream tasks, such as editing, they typically require additional efforts, including manual interventions, optimization, or fine-tuning. In this paper, we introduce a skeleton-aware latent diffusion (SALAD), a model that explicitly captures the intricate inter-relationships between joints, frames, and words. Furthermore, by leveraging cross-attention maps produced during the generation process, we enable attention-based zero-shot text-driven motion editing using a pre-trained SALAD model, requiring no additional user input beyond text prompts. Our approach significantly outperforms previous methods in terms of text-motion alignment without compromising generation quality, and demonstrates practical versatility by providing diverse editing capabilities beyond generation. Code is available at project page. </p> </div> </dd> <dt> <a name='item130'>[130]</a> <a href ="/abs/2503.13837" title="Abstract" id="2503.13837"> arXiv:2503.13837 </a> (cross-list from cs.CL) [<a href="/pdf/2503.13837" title="Download PDF" id="pdf-2503.13837" aria-labelledby="pdf-2503.13837">pdf</a>, <a href="https://arxiv.org/html/2503.13837v1" title="View HTML" id="html-2503.13837" aria-labelledby="html-2503.13837" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13837" title="Other formats" id="oth-2503.13837" aria-labelledby="oth-2503.13837">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Self-Vocabularizing Training for Neural Machine Translation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Lin,+P">Pin-Jie Lin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chang,+E">Ernie Chang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted to NAACL SRW 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Past vocabulary learning techniques identify relevant vocabulary before training, relying on statistical and entropy-based assumptions that largely neglect the role of model training. Empirically, we observe that trained translation models are induced to use a byte-pair encoding (BPE) vocabulary subset distinct from the original BPE vocabulary, leading to performance improvements when retrained with the induced vocabulary. In this paper, we analyze this discrepancy in neural machine translation by examining vocabulary and entropy shifts during self-training--where each iteration generates a labeled dataset by pairing source sentences with the model's predictions to define a new vocabulary. Building on these insights, we propose self-vocabularizing training, an iterative method that self-selects a smaller, more optimal vocabulary, yielding up to a 1.49 BLEU improvement. Moreover, we find that deeper model architectures lead to both an increase in unique token usage and a 6-8% reduction in vocabulary size. </p> </div> </dd> <dt> <a name='item131'>[131]</a> <a href ="/abs/2503.13844" title="Abstract" id="2503.13844"> arXiv:2503.13844 </a> (cross-list from cs.CL) [<a href="/pdf/2503.13844" title="Download PDF" id="pdf-2503.13844" aria-labelledby="pdf-2503.13844">pdf</a>, <a href="https://arxiv.org/html/2503.13844v1" title="View HTML" id="html-2503.13844" aria-labelledby="html-2503.13844" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13844" title="Other formats" id="oth-2503.13844" aria-labelledby="oth-2503.13844">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Spotting Persuasion: A Low-cost Model for Persuasion Detection in Political Ads on Social Media </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Meguellati,+E">Elyas Meguellati</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Civelli,+S">Stefano Civelli</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bernardelle,+P">Pietro Bernardelle</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sadiq,+S">Shazia Sadiq</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Demartini,+G">Gianluca Demartini</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI); Computers and Society (cs.CY); Machine Learning (cs.LG) </div> <p class='mathjax'> In the realm of political advertising, persuasion operates as a pivotal element within the broader framework of propaganda, exerting profound influences on public opinion and electoral outcomes. In this paper, we (1) introduce a lightweight model for persuasive text detection that achieves state-of-the-art performance in Subtask 3 of SemEval 2023 Task 3, while significantly reducing the computational resource requirements; and (2) leverage the proposed model to gain insights into political campaigning strategies on social media platforms by applying it to a real-world dataset we curated, consisting of Facebook political ads from the 2022 Australian Federal election campaign. Our study shows how subtleties can be found in persuasive political advertisements and presents a pragmatic approach to detect and analyze such strategies with limited resources, enhancing transparency in social media political campaigns. </p> </div> </dd> <dt> <a name='item132'>[132]</a> <a href ="/abs/2503.13858" title="Abstract" id="2503.13858"> arXiv:2503.13858 </a> (cross-list from cs.CV) [<a href="/pdf/2503.13858" title="Download PDF" id="pdf-2503.13858" aria-labelledby="pdf-2503.13858">pdf</a>, <a href="https://arxiv.org/html/2503.13858v1" title="View HTML" id="html-2503.13858" aria-labelledby="html-2503.13858" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13858" title="Other formats" id="oth-2503.13858" aria-labelledby="oth-2503.13858">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> MamBEV: Enabling State Space Models to Learn Birds-Eye-View Representations </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ke,+H">Hongyu Ke</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Morris,+J">Jack Morris</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Oguchi,+K">Kentaro Oguchi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cao,+X">Xiaofei Cao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Y">Yongkang Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+H">Haoxin Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ding,+Y">Yi Ding</a></div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> ICLR 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> 3D visual perception tasks, such as 3D detection from multi-camera images, are essential components of autonomous driving and assistance systems. However, designing computationally efficient methods remains a significant challenge. In this paper, we propose a Mamba-based framework called MamBEV, which learns unified Bird's Eye View (BEV) representations using linear spatio-temporal SSM-based attention. This approach supports multiple 3D perception tasks with significantly improved computational and memory efficiency. Furthermore, we introduce SSM based cross-attention, analogous to standard cross attention, where BEV query representations can interact with relevant image features. Extensive experiments demonstrate MamBEV's promising performance across diverse visual perception metrics, highlighting its advantages in input scaling efficiency compared to existing benchmark models. </p> </div> </dd> <dt> <a name='item133'>[133]</a> <a href ="/abs/2503.13862" title="Abstract" id="2503.13862"> arXiv:2503.13862 </a> (cross-list from cs.CV) [<a href="/pdf/2503.13862" title="Download PDF" id="pdf-2503.13862" aria-labelledby="pdf-2503.13862">pdf</a>, <a href="https://arxiv.org/html/2503.13862v1" title="View HTML" id="html-2503.13862" aria-labelledby="html-2503.13862" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13862" title="Other formats" id="oth-2503.13862" aria-labelledby="oth-2503.13862">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> HySurvPred: Multimodal Hyperbolic Embedding with Angle-Aware Hierarchical Contrastive Learning and Uncertainty Constraints for Survival Prediction </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+J">Jiaqi Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+W">Wenting Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xing,+X">Xiaohan Xing</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=He,+S">Sean He</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Luo,+X">Xiaoling Luo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lyu,+X">Xinheng Lyu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shen,+L">Linlin Shen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Qiu,+G">Guoping Qiu</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> submitted to IJCAI2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Multimodal learning that integrates histopathology images and genomic data holds great promise for cancer survival prediction. However, existing methods face key limitations: 1) They rely on multimodal mapping and metrics in Euclidean space, which cannot fully capture the hierarchical structures in histopathology (among patches from different resolutions) and genomics data (from genes to pathways). 2) They discretize survival time into independent risk intervals, which ignores its continuous and ordinal nature and fails to achieve effective optimization. 3) They treat censorship as a binary indicator, excluding censored samples from model optimization and not making full use of them. To address these challenges, we propose HySurvPred, a novel framework for survival prediction that integrates three key modules: Multimodal Hyperbolic Mapping (MHM), Angle-aware Ranking-based Contrastive Loss (ARCL) and Censor-Conditioned Uncertainty Constraint (CUC). Instead of relying on Euclidean space, we design the MHM module to explore the inherent hierarchical structures within each modality in hyperbolic space. To better integrate multimodal features in hyperbolic space, we introduce the ARCL module, which uses ranking-based contrastive learning to preserve the ordinal nature of survival time, along with the CUC module to fully explore the censored data. Extensive experiments demonstrate that our method outperforms state-of-the-art methods on five benchmark datasets. The source code is to be released. </p> </div> </dd> <dt> <a name='item134'>[134]</a> <a href ="/abs/2503.13985" title="Abstract" id="2503.13985"> arXiv:2503.13985 </a> (cross-list from cs.CV) [<a href="/pdf/2503.13985" title="Download PDF" id="pdf-2503.13985" aria-labelledby="pdf-2503.13985">pdf</a>, <a href="https://arxiv.org/html/2503.13985v1" title="View HTML" id="html-2503.13985" aria-labelledby="html-2503.13985" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13985" title="Other formats" id="oth-2503.13985" aria-labelledby="oth-2503.13985">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> DefectFill: Realistic Defect Generation with Inpainting Diffusion Model for Visual Inspection </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Song,+J">Jaewoo Song</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Park,+D">Daemin Park</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Baek,+K">Kanghyun Baek</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lee,+S">Sangyub Lee</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Choi,+J">Jooyoung Choi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+E">Eunji Kim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yoon,+S">Sungroh Yoon</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted by CVPR 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> Developing effective visual inspection models remains challenging due to the scarcity of defect data. While image generation models have been used to synthesize defect images, producing highly realistic defects remains difficult. We propose DefectFill, a novel method for realistic defect generation that requires only a few reference defect images. It leverages a fine-tuned inpainting diffusion model, optimized with our custom loss functions incorporating defect, object, and attention terms. It enables precise capture of detailed, localized defect features and their seamless integration into defect-free objects. Additionally, our Low-Fidelity Selection method further enhances the defect sample quality. Experiments show that DefectFill generates high-quality defect images, enabling visual inspection models to achieve state-of-the-art performance on the MVTec AD dataset. </p> </div> </dd> <dt> <a name='item135'>[135]</a> <a href ="/abs/2503.14002" title="Abstract" id="2503.14002"> arXiv:2503.14002 </a> (cross-list from cs.CV) [<a href="/pdf/2503.14002" title="Download PDF" id="pdf-2503.14002" aria-labelledby="pdf-2503.14002">pdf</a>, <a href="https://arxiv.org/html/2503.14002v1" title="View HTML" id="html-2503.14002" aria-labelledby="html-2503.14002" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14002" title="Other formats" id="oth-2503.14002" aria-labelledby="oth-2503.14002">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> MeshFleet: Filtered and Annotated 3D Vehicle Dataset for Domain Specific Generative Modeling </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Boborzi,+D">Damian Boborzi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mueller,+P">Phillip Mueller</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Emrich,+J">Jonas Emrich</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Schmid,+D">Dominik Schmid</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mueller,+S">Sebastian Mueller</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mikelsons,+L">Lars Mikelsons</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> Generative models have recently made remarkable progress in the field of 3D objects. However, their practical application in fields like engineering remains limited since they fail to deliver the accuracy, quality, and controllability needed for domain-specific tasks. Fine-tuning large generative models is a promising perspective for making these models available in these fields. Creating high-quality, domain-specific 3D datasets is crucial for fine-tuning large generative models, yet the data filtering and annotation process remains a significant bottleneck. We present MeshFleet, a filtered and annotated 3D vehicle dataset extracted from Objaverse-XL, the most extensive publicly available collection of 3D objects. Our approach proposes a pipeline for automated data filtering based on a quality classifier. This classifier is trained on a manually labeled subset of Objaverse, incorporating DINOv2 and SigLIP embeddings, refined through caption-based analysis and uncertainty estimation. We demonstrate the efficacy of our filtering method through a comparative analysis against caption and image aesthetic score-based techniques and fine-tuning experiments with SV3D, highlighting the importance of targeted data selection for domain-specific 3D generative modeling. </p> </div> </dd> <dt> <a name='item136'>[136]</a> <a href ="/abs/2503.14045" title="Abstract" id="2503.14045"> arXiv:2503.14045 </a> (cross-list from stat.ML) [<a href="/pdf/2503.14045" title="Download PDF" id="pdf-2503.14045" aria-labelledby="pdf-2503.14045">pdf</a>, <a href="/format/2503.14045" title="Other formats" id="oth-2503.14045" aria-labelledby="oth-2503.14045">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Empirical risk minimization algorithm for multiclass classification of S.D.E. paths </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Denis,+C">Christophe Denis</a> (SAMM), <a href="https://arxiv.org/search/stat?searchtype=author&query=Mintsa,+E+E">Eddy Ella Mintsa</a> (LAMA)</div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> We address the multiclass classification problem for stochastic diffusion paths, assuming that the classes are distinguished by their drift functions, while the diffusion coefficient remains common across all classes. In this setting, we propose a classification algorithm that relies on the minimization of the L 2 risk. We establish rates of convergence for the resulting predictor. Notably, we introduce a margin assumption under which we show that our procedure can achieve fast rates of convergence. Finally, a simulation study highlights the numerical performance of our classification algorithm. </p> </div> </dd> <dt> <a name='item137'>[137]</a> <a href ="/abs/2503.14055" title="Abstract" id="2503.14055"> arXiv:2503.14055 </a> (cross-list from math.OC) [<a href="/pdf/2503.14055" title="Download PDF" id="pdf-2503.14055" aria-labelledby="pdf-2503.14055">pdf</a>, <a href="https://arxiv.org/html/2503.14055v1" title="View HTML" id="html-2503.14055" aria-labelledby="html-2503.14055" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14055" title="Other formats" id="oth-2503.14055" aria-labelledby="oth-2503.14055">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Modular Distributed Nonconvex Learning with Error Feedback </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Carnevale,+G">Guido Carnevale</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Bastianello,+N">Nicola Bastianello</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Optimization and Control (math.OC)</span>; Machine Learning (cs.LG); Systems and Control (eess.SY) </div> <p class='mathjax'> In this paper, we design a novel distributed learning algorithm using stochastic compressed communications. In detail, we pursue a modular approach, merging ADMM and a gradient-based approach, benefiting from the robustness of the former and the computational efficiency of the latter. Additionally, we integrate a stochastic integral action (error feedback) enabling almost sure rejection of the compression error. We analyze the resulting method in nonconvex scenarios and guarantee almost sure asymptotic convergence to the set of stationary points of the problem. This result is obtained using system-theoretic tools based on stochastic timescale separation. We corroborate our findings with numerical simulations in nonconvex classification. </p> </div> </dd> <dt> <a name='item138'>[138]</a> <a href ="/abs/2503.14084" title="Abstract" id="2503.14084"> arXiv:2503.14084 </a> (cross-list from eess.IV) [<a href="/pdf/2503.14084" title="Download PDF" id="pdf-2503.14084" aria-labelledby="pdf-2503.14084">pdf</a>, <a href="https://arxiv.org/html/2503.14084v1" title="View HTML" id="html-2503.14084" aria-labelledby="html-2503.14084" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14084" title="Other formats" id="oth-2503.14084" aria-labelledby="oth-2503.14084">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Semantic Communication in Dynamic Channel Scenarios: Collaborative Optimization of Dual-Pipeline Joint Source-Channel Coding and Personalized Federated Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Yan,+X">Xingrun Yan</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Zuo,+S">Shiyuan Zuo</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Lyu,+Y">Yifeng Lyu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Fan,+R">Rongfei Fan</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Hu,+H">Han Hu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Semantic communication is designed to tackle issues like bandwidth constraints and high latency in communication systems. However, in complex network topologies with multiple users, the enormous combinations of client data and channel state information (CSI) pose significant challenges for existing semantic communication architectures. To improve the generalization ability of semantic communication models in complex scenarios while meeting the personalized needs of each user in their local environments, we propose a novel personalized federated learning framework with dual-pipeline joint source-channel coding based on channel awareness model (PFL-DPJSCCA). Within this framework, we present a method that achieves zero optimization gap for non-convex loss functions. Experiments conducted under varying SNR distributions validate the outstanding performance of our framework across diverse datasets. </p> </div> </dd> <dt> <a name='item139'>[139]</a> <a href ="/abs/2503.14095" title="Abstract" id="2503.14095"> arXiv:2503.14095 </a> (cross-list from physics.ao-ph) [<a href="/pdf/2503.14095" title="Download PDF" id="pdf-2503.14095" aria-labelledby="pdf-2503.14095">pdf</a>, <a href="https://arxiv.org/html/2503.14095v1" title="View HTML" id="html-2503.14095" aria-labelledby="html-2503.14095" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14095" title="Other formats" id="oth-2503.14095" aria-labelledby="oth-2503.14095">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Towards Location-Specific Precipitation Projections Using Deep Neural Networks </div> <div class='list-authors'><a href="https://arxiv.org/search/physics?searchtype=author&query=Kumar,+B">Bipin Kumar</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Yadav,+B+K">Bhvisy Kumar Yadav</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Mukhopadhyay,+S">Soumypdeep Mukhopadhyay</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Rohan,+R">Rakshit Rohan</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Singh,+B+B">Bhupendra Bahadur Singh</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Chattopadhyay,+R">Rajib Chattopadhyay</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Chilukoti,+N">Nagraju Chilukoti</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Sahai,+A+K">Atul Kumar Sahai</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 21 pages, 9 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Atmospheric and Oceanic Physics (physics.ao-ph)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Accurate precipitation estimates at individual locations are crucial for weather forecasting and spatial analysis. This study presents a paradigm shift by leveraging Deep Neural Networks (DNNs) to surpass traditional methods like Kriging for station-specific precipitation approximation. We propose two innovative NN architectures: one utilizing precipitation, elevation, and location, and another incorporating additional meteorological parameters like humidity, temperature, and wind speed. Trained on a vast dataset (1980-2019), these models outperform Kriging across various evaluation metrics (correlation coefficient, root mean square error, bias, and skill score) on a five-year validation set. This compelling evidence demonstrates the transformative power of deep learning for spatial prediction, offering a robust and precise alternative for station-specific precipitation estimation. </p> </div> </dd> <dt> <a name='item140'>[140]</a> <a href ="/abs/2503.14118" title="Abstract" id="2503.14118"> arXiv:2503.14118 </a> (cross-list from cond-mat.mtrl-sci) [<a href="/pdf/2503.14118" title="Download PDF" id="pdf-2503.14118" aria-labelledby="pdf-2503.14118">pdf</a>, <a href="https://arxiv.org/html/2503.14118v1" title="View HTML" id="html-2503.14118" aria-labelledby="html-2503.14118" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14118" title="Other formats" id="oth-2503.14118" aria-labelledby="oth-2503.14118">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> PET-MAD, a universal interatomic potential for advanced materials modeling </div> <div class='list-authors'><a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Mazitov,+A">Arslan Mazitov</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Bigi,+F">Filippo Bigi</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Kellner,+M">Matthias Kellner</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Pegolo,+P">Paolo Pegolo</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Tisi,+D">Davide Tisi</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Fraux,+G">Guillaume Fraux</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Pozdnyakov,+S">Sergey Pozdnyakov</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Loche,+P">Philip Loche</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Ceriotti,+M">Michele Ceriotti</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Materials Science (cond-mat.mtrl-sci)</span>; Machine Learning (cs.LG); Chemical Physics (physics.chem-ph) </div> <p class='mathjax'> Machine-learning interatomic potentials (MLIPs) have greatly extended the reach of atomic-scale simulations, offering the accuracy of first-principles calculations at a fraction of the effort. Leveraging large quantum mechanical databases and expressive architectures, recent "universal" models deliver qualitative accuracy across the periodic table but are often biased toward low-energy configurations. We introduce PET-MAD, a generally applicable MLIP trained on a dataset combining stable inorganic and organic solids, systematically modified to enhance atomic diversity. Using a moderate but highly-consistent level of electronic-structure theory, we assess PET-MAD's accuracy on established benchmarks and advanced simulations of six materials. PET-MAD rivals state-of-the-art MLIPs for inorganic solids, while also being reliable for molecules, organic materials, and surfaces. It is stable and fast, enabling, out-of-the-box, the near-quantitative study of thermal and quantum mechanical fluctuations, functional properties, and phase transitions. It can be efficiently fine-tuned to deliver full quantum mechanical accuracy with a minimal number of targeted calculations. </p> </div> </dd> <dt> <a name='item141'>[141]</a> <a href ="/abs/2503.14121" title="Abstract" id="2503.14121"> arXiv:2503.14121 </a> (cross-list from stat.ML) [<a href="/pdf/2503.14121" title="Download PDF" id="pdf-2503.14121" aria-labelledby="pdf-2503.14121">pdf</a>, <a href="/format/2503.14121" title="Other formats" id="oth-2503.14121" aria-labelledby="oth-2503.14121">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Fundamental Limits of Matrix Sensing: Exact Asymptotics, Universality, and Applications </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Xu,+Y">Yizhou Xu</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Maillard,+A">Antoine Maillard</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Zdeborov%C3%A1,+L">Lenka Zdeborov谩</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Krzakala,+F">Florent Krzakala</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Disordered Systems and Neural Networks (cond-mat.dis-nn); Information Theory (cs.IT); Machine Learning (cs.LG); Probability (math.PR) </div> <p class='mathjax'> In the matrix sensing problem, one wishes to reconstruct a matrix from (possibly noisy) observations of its linear projections along given directions. We consider this model in the high-dimensional limit: while previous works on this model primarily focused on the recovery of low-rank matrices, we consider in this work more general classes of structured signal matrices with potentially large rank, e.g. a product of two matrices of sizes proportional to the dimension. We provide rigorous asymptotic equations characterizing the Bayes-optimal learning performance from a number of samples which is proportional to the number of entries in the matrix. Our proof is composed of three key ingredients: $(i)$ we prove universality properties to handle structured sensing matrices, related to the ''Gaussian equivalence'' phenomenon in statistical learning, $(ii)$ we provide a sharp characterization of Bayes-optimal learning in generalized linear models with Gaussian data and structured matrix priors, generalizing previously studied settings, and $(iii)$ we leverage previous works on the problem of matrix denoising. The generality of our results allow for a variety of applications: notably, we mathematically establish predictions obtained via non-rigorous methods from statistical physics in [ETB+24] regarding Bilinear Sequence Regression, a benchmark model for learning from sequences of tokens, and in [MTM+24] on Bayes-optimal learning in neural networks with quadratic activation function, and width proportional to the dimension. </p> </div> </dd> <dt> <a name='item142'>[142]</a> <a href ="/abs/2503.14192" title="Abstract" id="2503.14192"> arXiv:2503.14192 </a> (cross-list from astro-ph.IM) [<a href="/pdf/2503.14192" title="Download PDF" id="pdf-2503.14192" aria-labelledby="pdf-2503.14192">pdf</a>, <a href="https://arxiv.org/html/2503.14192v1" title="View HTML" id="html-2503.14192" aria-labelledby="html-2503.14192" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14192" title="Other formats" id="oth-2503.14192" aria-labelledby="oth-2503.14192">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Strategic White Paper on AI Infrastructure for Particle, Nuclear, and Astroparticle Physics: Insights from JENA and EuCAIF </div> <div class='list-authors'><a href="https://arxiv.org/search/astro-ph?searchtype=author&query=Caron,+S">Sascha Caron</a>, <a href="https://arxiv.org/search/astro-ph?searchtype=author&query=Ipp,+A">Andreas Ipp</a>, <a href="https://arxiv.org/search/astro-ph?searchtype=author&query=Aarts,+G">Gert Aarts</a>, <a href="https://arxiv.org/search/astro-ph?searchtype=author&query=B%C3%ADr%C3%B3,+G">G谩bor B铆r贸</a>, <a href="https://arxiv.org/search/astro-ph?searchtype=author&query=Bonacorsi,+D">Daniele Bonacorsi</a>, <a href="https://arxiv.org/search/astro-ph?searchtype=author&query=Cuoco,+E">Elena Cuoco</a>, <a href="https://arxiv.org/search/astro-ph?searchtype=author&query=Doglioni,+C">Caterina Doglioni</a>, <a href="https://arxiv.org/search/astro-ph?searchtype=author&query=Dorigo,+T">Tommaso Dorigo</a>, <a href="https://arxiv.org/search/astro-ph?searchtype=author&query=Pardi%C3%B1as,+J+G">Juli谩n Garc铆a Pardi帽as</a>, <a href="https://arxiv.org/search/astro-ph?searchtype=author&query=Giagu,+S">Stefano Giagu</a>, <a href="https://arxiv.org/search/astro-ph?searchtype=author&query=Golling,+T">Tobias Golling</a>, <a href="https://arxiv.org/search/astro-ph?searchtype=author&query=Heinrich,+L">Lukas Heinrich</a>, <a href="https://arxiv.org/search/astro-ph?searchtype=author&query=Heng,+I+S">Ik Siong Heng</a>, <a href="https://arxiv.org/search/astro-ph?searchtype=author&query=Isar,+P+G">Paula Gina Isar</a>, <a href="https://arxiv.org/search/astro-ph?searchtype=author&query=Potamianos,+K">Karolos Potamianos</a>, <a href="https://arxiv.org/search/astro-ph?searchtype=author&query=Teodorescu,+L">Liliana Teodorescu</a>, <a href="https://arxiv.org/search/astro-ph?searchtype=author&query=Veitch,+J">John Veitch</a>, <a href="https://arxiv.org/search/astro-ph?searchtype=author&query=Vischia,+P">Pietro Vischia</a>, <a href="https://arxiv.org/search/astro-ph?searchtype=author&query=Weniger,+C">Christoph Weniger</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 19 pages, 5 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Instrumentation and Methods for Astrophysics (astro-ph.IM)</span>; High Energy Astrophysical Phenomena (astro-ph.HE); Artificial Intelligence (cs.AI); Machine Learning (cs.LG); High Energy Physics - Experiment (hep-ex); High Energy Physics - Phenomenology (hep-ph); Nuclear Theory (nucl-th) </div> <p class='mathjax'> Artificial intelligence (AI) is transforming scientific research, with deep learning methods playing a central role in data analysis, simulations, and signal detection across particle, nuclear, and astroparticle physics. Within the JENA communities-ECFA, NuPECC, and APPEC-and as part of the EuCAIF initiative, AI integration is advancing steadily. However, broader adoption remains constrained by challenges such as limited computational resources, a lack of expertise, and difficulties in transitioning from research and development (R&D) to production. This white paper provides a strategic roadmap, informed by a community survey, to address these barriers. It outlines critical infrastructure requirements, prioritizes training initiatives, and proposes funding strategies to scale AI capabilities across fundamental physics over the next five years. </p> </div> </dd> <dt> <a name='item143'>[143]</a> <a href ="/abs/2503.14213" title="Abstract" id="2503.14213"> arXiv:2503.14213 </a> (cross-list from cs.IR) [<a href="/pdf/2503.14213" title="Download PDF" id="pdf-2503.14213" aria-labelledby="pdf-2503.14213">pdf</a>, <a href="https://arxiv.org/html/2503.14213v1" title="View HTML" id="html-2503.14213" aria-labelledby="html-2503.14213" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14213" title="Other formats" id="oth-2503.14213" aria-labelledby="oth-2503.14213">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Rolling Forward: Enhancing LightGCN with Causal Graph Convolution for Credit Bond Recommendation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ghiye,+A">Ashraf Ghiye</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Barreau,+B">Baptiste Barreau</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Carlier,+L">Laurent Carlier</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Vazirgiannis,+M">Michalis Vazirgiannis</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 8 pages, published in the international conference for AI in Finance (ACM ICAIF'24) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Information Retrieval (cs.IR)</span>; Machine Learning (cs.LG); Computational Finance (q-fin.CP) </div> <p class='mathjax'> Graph Neural Networks have significantly advanced research in recommender systems over the past few years. These methods typically capture global interests using aggregated past interactions and rely on static embeddings of users and items over extended periods of time. While effective in some domains, these methods fall short in many real-world scenarios, especially in finance, where user interests and item popularity evolve rapidly over time. To address these challenges, we introduce a novel extension to Light Graph Convolutional Network (LightGCN) designed to learn temporal node embeddings that capture dynamic interests. Our approach employs causal convolution to maintain a forward-looking model architecture. By preserving the chronological order of user-item interactions and introducing a dynamic update mechanism for embeddings through a sliding window, the proposed model generates well-timed and contextually relevant recommendations. Extensive experiments on a real-world dataset from BNP Paribas demonstrate that our approach significantly enhances the performance of LightGCN while maintaining the simplicity and efficiency of its architecture. Our findings provide new insights into designing graph-based recommender systems in time-sensitive applications, particularly for financial product recommendations. </p> </div> </dd> <dt> <a name='item144'>[144]</a> <a href ="/abs/2503.14231" title="Abstract" id="2503.14231"> arXiv:2503.14231 </a> (cross-list from cs.CV) [<a href="/pdf/2503.14231" title="Download PDF" id="pdf-2503.14231" aria-labelledby="pdf-2503.14231">pdf</a>, <a href="https://arxiv.org/html/2503.14231v1" title="View HTML" id="html-2503.14231" aria-labelledby="html-2503.14231" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14231" title="Other formats" id="oth-2503.14231" aria-labelledby="oth-2503.14231">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Multi-task Learning for Identification of Porcelain in Song and Yuan Dynasties </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ling,+Z">Ziyao Ling</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Delnevo,+G">Giovanni Delnevo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Salomoni,+P">Paola Salomoni</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mirri,+S">Silvia Mirri</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Chinese porcelain holds immense historical and cultural value, making its accurate classification essential for archaeological research and cultural heritage preservation. Traditional classification methods rely heavily on expert analysis, which is time-consuming, subjective, and difficult to scale. This paper explores the application of DL and transfer learning techniques to automate the classification of porcelain artifacts across four key attributes: dynasty, glaze, ware, and type. We evaluate four Convolutional Neural Networks (CNNs) - ResNet50, MobileNetV2, VGG16, and InceptionV3 - comparing their performance with and without pre-trained weights. Our results demonstrate that transfer learning significantly enhances classification accuracy, particularly for complex tasks like type classification, where models trained from scratch exhibit lower performance. MobileNetV2 and ResNet50 consistently achieve high accuracy and robustness across all tasks, while VGG16 struggles with more diverse classifications. We further discuss the impact of dataset limitations and propose future directions, including domain-specific pre-training, integration of attention mechanisms, explainable AI methods, and generalization to other cultural artifacts. </p> </div> </dd> <dt> <a name='item145'>[145]</a> <a href ="/abs/2503.14232" title="Abstract" id="2503.14232"> arXiv:2503.14232 </a> (cross-list from cs.CV) [<a href="/pdf/2503.14232" title="Download PDF" id="pdf-2503.14232" aria-labelledby="pdf-2503.14232">pdf</a>, <a href="https://arxiv.org/html/2503.14232v1" title="View HTML" id="html-2503.14232" aria-labelledby="html-2503.14232" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14232" title="Other formats" id="oth-2503.14232" aria-labelledby="oth-2503.14232">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> CRCE: Coreference-Retention Concept Erasure in Text-to-Image Diffusion Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Xue,+Y">Yuyang Xue</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Moroshko,+E">Edward Moroshko</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+F">Feng Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=McDonagh,+S">Steven McDonagh</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tsaftaris,+S+A">Sotirios A. Tsaftaris</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI); Computation and Language (cs.CL); Machine Learning (cs.LG) </div> <p class='mathjax'> Text-to-Image diffusion models can produce undesirable content that necessitates concept erasure techniques. However, existing methods struggle with under-erasure, leaving residual traces of targeted concepts, or over-erasure, mistakenly eliminating unrelated but visually similar concepts. To address these limitations, we introduce CRCE, a novel concept erasure framework that leverages Large Language Models to identify both semantically related concepts that should be erased alongside the target and distinct concepts that should be preserved. By explicitly modeling coreferential and retained concepts semantically, CRCE enables more precise concept removal, without unintended erasure. Experiments demonstrate that CRCE outperforms existing methods on diverse erasure tasks. </p> </div> </dd> <dt> <a name='item146'>[146]</a> <a href ="/abs/2503.14253" title="Abstract" id="2503.14253"> arXiv:2503.14253 </a> (cross-list from q-bio.QM) [<a href="/pdf/2503.14253" title="Download PDF" id="pdf-2503.14253" aria-labelledby="pdf-2503.14253">pdf</a>, <a href="https://arxiv.org/html/2503.14253v1" title="View HTML" id="html-2503.14253" aria-labelledby="html-2503.14253" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14253" title="Other formats" id="oth-2503.14253" aria-labelledby="oth-2503.14253">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> CINNAMON: A hybrid approach to change point detection and parameter estimation in single-particle tracking data </div> <div class='list-authors'><a href="https://arxiv.org/search/q-bio?searchtype=author&query=Malinowski,+J">Jakub Malinowski</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Kostrzewa,+M">Marcin Kostrzewa</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Balcerek,+M">Micha艂 Balcerek</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Tomczuk,+W">Weronika Tomczuk</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Szwabi%C5%84ski,+J">Janusz Szwabi艅ski</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Quantitative Methods (q-bio.QM)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Change point detection has become an important part of the analysis of the single-particle tracking data, as it allows one to identify moments, in which the motion patterns of observed particles undergo significant changes. The segmentation of diffusive trajectories based on those moments may provide insight into various phenomena in soft condensed matter and biological physics. In this paper, we propose CINNAMON, a hybrid approach to classifying single-particle tracking trajectories, detecting change points within them, and estimating diffusion parameters in the segments between the change points. Our method is based on a combination of neural networks, feature-based machine learning, and statistical techniques. It has been benchmarked in the second Anomalous Diffusion Challenge. The method offers a high level of interpretability due to its analytical and feature-based components. A potential use of features from topological data analysis is also discussed. </p> </div> </dd> <dt> <a name='item147'>[147]</a> <a href ="/abs/2503.14260" title="Abstract" id="2503.14260"> arXiv:2503.14260 </a> (cross-list from physics.optics) [<a href="/pdf/2503.14260" title="Download PDF" id="pdf-2503.14260" aria-labelledby="pdf-2503.14260">pdf</a>, <a href="https://arxiv.org/html/2503.14260v1" title="View HTML" id="html-2503.14260" aria-labelledby="html-2503.14260" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14260" title="Other formats" id="oth-2503.14260" aria-labelledby="oth-2503.14260">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Automating Experimental Optics with Sample Efficient Machine Learning Methods </div> <div class='list-authors'><a href="https://arxiv.org/search/physics?searchtype=author&query=Saha,+A">Arindam Saha</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Charoensombutamon,+B">Baramee Charoensombutamon</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Michel,+T">Thibault Michel</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Vijendran,+V">V. Vijendran</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Walker,+L">Lachlan Walker</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Furusawa,+A">Akira Furusawa</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Assad,+S+M">Syed M. Assad</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Buchler,+B+C">Ben C. Buchler</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Lam,+P+K">Ping Koy Lam</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Tranter,+A+D">Aaron D. Tranter</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Optics (physics.optics)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> As free-space optical systems grow in scale and complexity, troubleshooting becomes increasingly time-consuming and, in the case of remote installations, perhaps impractical. An example of a task that is often laborious is the alignment of a high-finesse optical resonator, which is highly sensitive to the mode of the input beam. In this work, we demonstrate how machine learning can be used to achieve autonomous mode-matching of a free-space optical resonator with minimal supervision. Our approach leverages sample-efficient algorithms to reduce data requirements while maintaining a simple architecture for easy deployment. The reinforcement learning scheme that we have developed shows that automation is feasible even in systems prone to drift in experimental parameters, as may well be the case in real-world applications. </p> </div> </dd> <dt> <a name='item148'>[148]</a> <a href ="/abs/2503.14281" title="Abstract" id="2503.14281"> arXiv:2503.14281 </a> (cross-list from cs.CR) [<a href="/pdf/2503.14281" title="Download PDF" id="pdf-2503.14281" aria-labelledby="pdf-2503.14281">pdf</a>, <a href="https://arxiv.org/html/2503.14281v1" title="View HTML" id="html-2503.14281" aria-labelledby="html-2503.14281" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14281" title="Other formats" id="oth-2503.14281" aria-labelledby="oth-2503.14281">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> XOXO: Stealthy Cross-Origin Context Poisoning Attacks against AI Coding Assistants </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=%C5%A0torek,+A">Adam 艩torek</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gupta,+M">Mukur Gupta</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bhatt,+N">Noopur Bhatt</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gupta,+A">Aditya Gupta</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+J">Janie Kim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Prashast">Prashast Srivastava</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jana,+S">Suman Jana</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Cryptography and Security (cs.CR)</span>; Machine Learning (cs.LG); Software Engineering (cs.SE) </div> <p class='mathjax'> AI coding assistants are widely used for tasks like code generation, bug detection, and comprehension. These tools now require large and complex contexts, automatically sourced from various origins$\unicode{x2014}$across files, projects, and contributors$\unicode{x2014}$forming part of the prompt fed to underlying LLMs. This automatic context-gathering introduces new vulnerabilities, allowing attackers to subtly poison input to compromise the assistant's outputs, potentially generating vulnerable code, overlooking flaws, or introducing critical errors. We propose a novel attack, Cross-Origin Context Poisoning (XOXO), that is particularly challenging to detect as it relies on adversarial code modifications that are semantically equivalent. Traditional program analysis techniques struggle to identify these correlations since the semantics of the code remain correct, making it appear legitimate. This allows attackers to manipulate code assistants into producing incorrect outputs, including vulnerabilities or backdoors, while shifting the blame to the victim developer or tester. We introduce a novel, task-agnostic black-box attack algorithm GCGS that systematically searches the transformation space using a Cayley Graph, achieving an 83.09% attack success rate on average across five tasks and eleven models, including GPT-4o and Claude 3.5 Sonnet v2 used by many popular AI coding assistants. Furthermore, existing defenses, including adversarial fine-tuning, are ineffective against our attack, underscoring the need for new security measures in LLM-powered coding tools. </p> </div> </dd> <dt> <a name='item149'>[149]</a> <a href ="/abs/2503.14322" title="Abstract" id="2503.14322"> arXiv:2503.14322 </a> (cross-list from eess.SP) [<a href="/pdf/2503.14322" title="Download PDF" id="pdf-2503.14322" aria-labelledby="pdf-2503.14322">pdf</a>, <a href="https://arxiv.org/html/2503.14322v1" title="View HTML" id="html-2503.14322" aria-labelledby="html-2503.14322" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14322" title="Other formats" id="oth-2503.14322" aria-labelledby="oth-2503.14322">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Consumer-grade EEG-based Eye Tracking </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Afonso,+T+V">Tiago Vasconcelos Afonso</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Heinrichs,+F">Florian Heinrichs</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Data descriptor, 13 pages, 8 figures, 5 tables </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span>; Human-Computer Interaction (cs.HC); Machine Learning (cs.LG) </div> <p class='mathjax'> Electroencephalography-based eye tracking (EEG-ET) leverages eye movement artifacts in EEG signals as an alternative to camera-based tracking. While EEG-ET offers advantages such as robustness in low-light conditions and better integration with brain-computer interfaces, its development lags behind traditional methods, particularly in consumer-grade settings. To support research in this area, we present a dataset comprising simultaneous EEG and eye-tracking recordings from 113 participants across 116 sessions, amounting to 11 hours and 45 minutes of recordings. Data was collected using a consumer-grade EEG headset and webcam-based eye tracking, capturing eye movements under four experimental paradigms with varying complexity. The dataset enables the evaluation of EEG-ET methods across different gaze conditions and serves as a benchmark for assessing feasibility with affordable hardware. Data preprocessing includes handling of missing values and filtering to enhance usability. In addition to the dataset, code for data preprocessing and analysis is available to support reproducibility and further research. </p> </div> </dd> <dt> <a name='item150'>[150]</a> <a href ="/abs/2503.14345" title="Abstract" id="2503.14345"> arXiv:2503.14345 </a> (cross-list from eess.AS) [<a href="/pdf/2503.14345" title="Download PDF" id="pdf-2503.14345" aria-labelledby="pdf-2503.14345">pdf</a>, <a href="/format/2503.14345" title="Other formats" id="oth-2503.14345" aria-labelledby="oth-2503.14345">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> MoonCast: High-Quality Zero-Shot Podcast Generation </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Ju,+Z">Zeqian Ju</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Yang,+D">Dongchao Yang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Yu,+J">Jianwei Yu</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Shen,+K">Kai Shen</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Leng,+Y">Yichong Leng</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Wang,+Z">Zhengtao Wang</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Tan,+X">Xu Tan</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Zhou,+X">Xinyu Zhou</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Qin,+T">Tao Qin</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Li,+X">Xiangyang Li</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Audio and Speech Processing (eess.AS)</span>; Artificial Intelligence (cs.AI); Computation and Language (cs.CL); Machine Learning (cs.LG); Sound (cs.SD) </div> <p class='mathjax'> Recent advances in text-to-speech synthesis have achieved notable success in generating high-quality short utterances for individual speakers. However, these systems still face challenges when extending their capabilities to long, multi-speaker, and spontaneous dialogues, typical of real-world scenarios such as podcasts. These limitations arise from two primary challenges: 1) long speech: podcasts typically span several minutes, exceeding the upper limit of most existing work; 2) spontaneity: podcasts are marked by their spontaneous, oral nature, which sharply contrasts with formal, written contexts; existing works often fall short in capturing this spontaneity. In this paper, we propose MoonCast, a solution for high-quality zero-shot podcast generation, aiming to synthesize natural podcast-style speech from text-only sources (e.g., stories, technical reports, news in TXT, PDF, or Web URL formats) using the voices of unseen speakers. To generate long audio, we adopt a long-context language model-based audio modeling approach utilizing large-scale long-context speech data. To enhance spontaneity, we utilize a podcast generation module to generate scripts with spontaneous details, which have been empirically shown to be as crucial as the text-to-speech modeling itself. Experiments demonstrate that MoonCast outperforms baselines, with particularly notable improvements in spontaneity and coherence. </p> </div> </dd> <dt> <a name='item151'>[151]</a> <a href ="/abs/2503.14353" title="Abstract" id="2503.14353"> arXiv:2503.14353 </a> (cross-list from eess.SP) [<a href="/pdf/2503.14353" title="Download PDF" id="pdf-2503.14353" aria-labelledby="pdf-2503.14353">pdf</a>, <a href="https://arxiv.org/html/2503.14353v1" title="View HTML" id="html-2503.14353" aria-labelledby="html-2503.14353" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14353" title="Other formats" id="oth-2503.14353" aria-labelledby="oth-2503.14353">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Unified Analysis of Decentralized Gradient Descent: a Contraction Mapping Framework </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Larsson,+E+G">Erik G. Larsson</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Michelusi,+N">Nicolo Michelusi</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> submitted to the IEEE Open Journal of Signal Processing </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span>; Distributed, Parallel, and Cluster Computing (cs.DC); Machine Learning (cs.LG) </div> <p class='mathjax'> The decentralized gradient descent (DGD) algorithm, and its sibling, diffusion, are workhorses in decentralized machine learning, distributed inference and estimation, and multi-agent coordination. We propose a novel, principled framework for the analysis of DGD and diffusion for strongly convex, smooth objectives, and arbitrary undirected topologies, using contraction mappings coupled with a result called the mean Hessian theorem (MHT). The use of these tools yields tight convergence bounds, both in the noise-free and noisy regimes. While these bounds are qualitatively similar to results found in the literature, our approach using contractions together with the MHT decouples the algorithm dynamics (how quickly the algorithm converges to its fixed point) from its asymptotic convergence properties (how far the fixed point is from the global optimum). This yields a simple, intuitive analysis that is accessible to a broader audience. Extensions are provided to multiple local gradient updates, time-varying step sizes, noisy gradients (stochastic DGD and diffusion), communication noise, and random topologies. </p> </div> </dd> <dt> <a name='item152'>[152]</a> <a href ="/abs/2503.14358" title="Abstract" id="2503.14358"> arXiv:2503.14358 </a> (cross-list from cs.CV) [<a href="/pdf/2503.14358" title="Download PDF" id="pdf-2503.14358" aria-labelledby="pdf-2503.14358">pdf</a>, <a href="https://arxiv.org/html/2503.14358v1" title="View HTML" id="html-2503.14358" aria-labelledby="html-2503.14358" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14358" title="Other formats" id="oth-2503.14358" aria-labelledby="oth-2503.14358">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> RFMI: Estimating Mutual Information on Rectified Flow for Text-to-Image Alignment </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+C">Chao Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Franzese,+G">Giulio Franzese</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Finamore,+A">Alessandro Finamore</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Michiardi,+P">Pietro Michiardi</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> to appear at ICLR 2025 Workshop on Deep Generative Model in Machine Learning: Theory, Principle and Efficacy </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Rectified Flow (RF) models trained with a Flow matching framework have achieved state-of-the-art performance on Text-to-Image (T2I) conditional generation. Yet, multiple benchmarks show that synthetic images can still suffer from poor alignment with the prompt, i.e., images show wrong attribute binding, subject positioning, numeracy, etc. While the literature offers many methods to improve T2I alignment, they all consider only Diffusion Models, and require auxiliary datasets, scoring models, and linguistic analysis of the prompt. In this paper we aim to address these gaps. First, we introduce RFMI, a novel Mutual Information (MI) estimator for RF models that uses the pre-trained model itself for the MI estimation. Then, we investigate a self-supervised fine-tuning approach for T2I alignment based on RFMI that does not require auxiliary information other than the pre-trained model itself. Specifically, a fine-tuning set is constructed by selecting synthetic images generated from the pre-trained RF model and having high point-wise MI between images and prompts. Our experiments on MI estimation benchmarks demonstrate the validity of RFMI, and empirical fine-tuning on SD3.5-Medium confirms the effectiveness of RFMI for improving T2I alignment while maintaining image quality. </p> </div> </dd> <dt> <a name='item153'>[153]</a> <a href ="/abs/2503.14369" title="Abstract" id="2503.14369"> arXiv:2503.14369 </a> (cross-list from physics.flu-dyn) [<a href="/pdf/2503.14369" title="Download PDF" id="pdf-2503.14369" aria-labelledby="pdf-2503.14369">pdf</a>, <a href="https://arxiv.org/html/2503.14369v1" title="View HTML" id="html-2503.14369" aria-labelledby="html-2503.14369" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14369" title="Other formats" id="oth-2503.14369" aria-labelledby="oth-2503.14369">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> C(NN)FD -- Deep Learning Modelling of Multi-Stage Axial Compressors Aerodynamics </div> <div class='list-authors'><a href="https://arxiv.org/search/physics?searchtype=author&query=Bruni,+G">Giuseppe Bruni</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Maleki,+S">Sepehr Maleki</a>, <a href="https://arxiv.org/search/physics?searchtype=author&query=Krishnababu,+S+K">Senthil K Krishnababu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Fluid Dynamics (physics.flu-dyn)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> The field of scientific machine learning and its applications to numerical analyses such as CFD has recently experienced a surge in interest. While its viability has been demonstrated in different domains, it has not yet reached a level of robustness and scalability to make it practical for industrial applications in the turbomachinery field. The highly complex, turbulent, and three-dimensional flows of multi-stage axial compressors for gas turbine applications represent a remarkably challenging case. This is due to the high-dimensionality of the regression of the flow-field from geometrical and operational variables, and the high computational cost associated with the large scale of the CFD domains. This paper demonstrates the development and application of a generalized deep learning framework for predictions of the flow field and aerodynamic performance of multi-stage axial compressors, also potentially applicable to any type of turbomachinery. A physics-based dimensionality reduction unlocks the potential for flow-field predictions for large-scale domains, re-formulating the regression problem from an unstructured to a structured one. The relevant physical equations are used to define a multi-dimensional physical loss function. Compared to "black-box" approaches, the proposed framework has the advantage of physically explainable predictions of overall performance, as the corresponding aerodynamic drivers can be identified on a 0D/1D/2D/3D level. An iterative architecture is employed, improving the accuracy of the predictions, as well as estimating the associated uncertainty. The model is trained on a series of dataset including manufacturing and build variations, different geometries, compressor designs and operating conditions. This demonstrates the capability to predict the flow-field and the overall performance in a generalizable manner, with accuracy comparable to the benchmark. </p> </div> </dd> <dt> <a name='item154'>[154]</a> <a href ="/abs/2503.14375" title="Abstract" id="2503.14375"> arXiv:2503.14375 </a> (cross-list from cs.GR) [<a href="/pdf/2503.14375" title="Download PDF" id="pdf-2503.14375" aria-labelledby="pdf-2503.14375">pdf</a>, <a href="https://arxiv.org/html/2503.14375v1" title="View HTML" id="html-2503.14375" aria-labelledby="html-2503.14375" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14375" title="Other formats" id="oth-2503.14375" aria-labelledby="oth-2503.14375">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Evaluating Machine Learning Approaches for ASCII Art Generation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Coumar,+S">Sai Coumar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kingston,+Z">Zachary Kingston</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 9 pages, 7 figures, 3 tables. Code available at <a href="https://github.com/saiccoumar/deep_ascii_converter" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Graphics (cs.GR)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Generating structured ASCII art using computational techniques demands a careful interplay between aesthetic representation and computational precision, requiring models that can effectively translate visual information into symbolic text characters. Although Convolutional Neural Networks (CNNs) have shown promise in this domain, the comparative performance of deep learning architectures and classical machine learning methods remains unexplored. This paper explores the application of contemporary ML and DL methods to generate structured ASCII art, focusing on three key criteria: fidelity, character classification accuracy, and output quality. We investigate deep learning architectures, including Multilayer Perceptrons (MLPs), ResNet, and MobileNetV2, alongside classical approaches such as Random Forests, Support Vector Machines (SVMs) and k-Nearest Neighbors (k-NN), trained on an augmented synthetic dataset of ASCII characters. Our results show that complex neural network architectures often fall short in producing high-quality ASCII art, whereas classical machine learning classifiers, despite their simplicity, achieve performance similar to CNNs. Our findings highlight the strength of classical methods in bridging model simplicity with output quality, offering new insights into ASCII art synthesis and machine learning on image data with low dimensionality. </p> </div> </dd> <dt> <a name='item155'>[155]</a> <a href ="/abs/2503.14377" title="Abstract" id="2503.14377"> arXiv:2503.14377 </a> (cross-list from eess.IV) [<a href="/pdf/2503.14377" title="Download PDF" id="pdf-2503.14377" aria-labelledby="pdf-2503.14377">pdf</a>, <a href="https://arxiv.org/html/2503.14377v1" title="View HTML" id="html-2503.14377" aria-labelledby="html-2503.14377" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14377" title="Other formats" id="oth-2503.14377" aria-labelledby="oth-2503.14377">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Advancing Medical Representation Learning Through High-Quality Data </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Baghbanzadeh,+N">Negin Baghbanzadeh</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Fallahpour,+A">Adibvafa Fallahpour</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Parhizkar,+Y">Yasaman Parhizkar</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Ogidi,+F">Franklin Ogidi</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Roy,+S">Shuvendu Roy</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Ashkezari,+S">Sajad Ashkezari</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Khazaie,+V+R">Vahid Reza Khazaie</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Colacci,+M">Michael Colacci</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Etemad,+A">Ali Etemad</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Afkanpour,+A">Arash Afkanpour</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Dolatabadi,+E">Elham Dolatabadi</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Computer Vision and Pattern Recognition (cs.CV); Machine Learning (cs.LG) </div> <p class='mathjax'> Despite the growing scale of medical Vision-Language datasets, the impact of dataset quality on model performance remains under-explored. We introduce Open-PMC, a high-quality medical dataset from PubMed Central, containing 2.2 million image-text pairs, enriched with image modality annotations, subfigures, and summarized in-text references. Notably, the in-text references provide richer medical context, extending beyond the abstract information typically found in captions. Through extensive experiments, we benchmark Open-PMC against larger datasets across retrieval and zero-shot classification tasks. Our results show that dataset quality-not just size-drives significant performance gains. We complement our benchmark with an in-depth analysis of feature representation. Our findings highlight the crucial role of data curation quality in advancing multimodal medical AI. We release Open-PMC, along with the trained models and our codebase. </p> </div> </dd> <dt> <a name='item156'>[156]</a> <a href ="/abs/2503.14381" title="Abstract" id="2503.14381"> arXiv:2503.14381 </a> (cross-list from stat.ML) [<a href="/pdf/2503.14381" title="Download PDF" id="pdf-2503.14381" aria-labelledby="pdf-2503.14381">pdf</a>, <a href="/format/2503.14381" title="Other formats" id="oth-2503.14381" aria-labelledby="oth-2503.14381">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Optimizing High-Dimensional Oblique Splits </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Chi,+C">Chien-Ming Chi</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 79 pages, 9 tables </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Machine Learning (cs.LG); Statistics Theory (math.ST); Methodology (stat.ME) </div> <p class='mathjax'> Orthogonal-split trees perform well, but evidence suggests oblique splits can enhance their performance. This paper explores optimizing high-dimensional $s$-sparse oblique splits from $\{(\vec{w}, \vec{w}^{\top}\boldsymbol{X}_{i}) : i\in \{1,\dots, n\}, \vec{w} \in \mathbb{R}^p, \| \vec{w} \|_{2} = 1, \| \vec{w} \|_{0} \leq s \}$ for growing oblique trees, where $ s $ is a user-defined sparsity parameter. We establish a connection between SID convergence and $s_0$-sparse oblique splits with $s_0\ge 1$, showing that the SID function class expands as $s_0$ increases, enabling the capture of more complex data-generating functions such as the $s_0$-dimensional XOR function. Thus, $s_0$ represents the unknown potential complexity of the underlying data-generating function. Learning these complex functions requires an $s$-sparse oblique tree with $s \geq s_0$ and greater computational resources. This highlights a trade-off between statistical accuracy, governed by the SID function class size depending on $s_0$, and computational cost. In contrast, previous studies have explored the problem of SID convergence using orthogonal splits with $ s_0 = s = 1 $, where runtime was less critical. Additionally, we introduce a practical framework for oblique trees that integrates optimized oblique splits alongside orthogonal splits into random forests. The proposed approach is assessed through simulations and real-data experiments, comparing its performance against various oblique tree models. </p> </div> </dd> <dt> <a name='item157'>[157]</a> <a href ="/abs/2503.14405" title="Abstract" id="2503.14405"> arXiv:2503.14405 </a> (cross-list from cs.CV) [<a href="/pdf/2503.14405" title="Download PDF" id="pdf-2503.14405" aria-labelledby="pdf-2503.14405">pdf</a>, <a href="https://arxiv.org/html/2503.14405v1" title="View HTML" id="html-2503.14405" aria-labelledby="html-2503.14405" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14405" title="Other formats" id="oth-2503.14405" aria-labelledby="oth-2503.14405">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> DUNE: Distilling a Universal Encoder from Heterogeneous 2D and 3D Teachers </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Sariyildiz,+M+B">Mert Bulent Sariyildiz</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Weinzaepfel,+P">Philippe Weinzaepfel</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lucas,+T">Thomas Lucas</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=de+Jorge,+P">Pau de Jorge</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Larlus,+D">Diane Larlus</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kalantidis,+Y">Yannis Kalantidis</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted to CVPR-2025. Project page: <a href="https://europe.naverlabs.com/dune" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Recent multi-teacher distillation methods have unified the encoders of multiple foundation models into a single encoder, achieving competitive performance on core vision tasks like classification, segmentation, and depth estimation. This led us to ask: Could similar success be achieved when the pool of teachers also includes vision models specialized in diverse tasks across both 2D and 3D perception? In this paper, we define and investigate the problem of heterogeneous teacher distillation, or co-distillation, a challenging multi-teacher distillation scenario where teacher models vary significantly in both (a) their design objectives and (b) the data they were trained on. We explore data-sharing strategies and teacher-specific encoding, and introduce DUNE, a single encoder excelling in 2D vision, 3D understanding, and 3D human perception. Our model achieves performance comparable to that of its larger teachers, sometimes even outperforming them, on their respective tasks. Notably, DUNE surpasses MASt3R in Map-free Visual Relocalization with a much smaller encoder. </p> </div> </dd> <dt> <a name='item158'>[158]</a> <a href ="/abs/2503.14421" title="Abstract" id="2503.14421"> arXiv:2503.14421 </a> (cross-list from cs.CV) [<a href="/pdf/2503.14421" title="Download PDF" id="pdf-2503.14421" aria-labelledby="pdf-2503.14421">pdf</a>, <a href="https://arxiv.org/html/2503.14421v1" title="View HTML" id="html-2503.14421" aria-labelledby="html-2503.14421" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14421" title="Other formats" id="oth-2503.14421" aria-labelledby="oth-2503.14421">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> ExDDV: A New Dataset for Explainable Deepfake Detection in Video </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Hondru,+V">Vlad Hondru</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hogea,+E">Eduard Hogea</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Onchis,+D">Darian Onchis</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ionescu,+R+T">Radu Tudor Ionescu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI); Computation and Language (cs.CL); Machine Learning (cs.LG); Multimedia (cs.MM) </div> <p class='mathjax'> The ever growing realism and quality of generated videos makes it increasingly harder for humans to spot deepfake content, who need to rely more and more on automatic deepfake detectors. However, deepfake detectors are also prone to errors, and their decisions are not explainable, leaving humans vulnerable to deepfake-based fraud and misinformation. To this end, we introduce ExDDV, the first dataset and benchmark for Explainable Deepfake Detection in Video. ExDDV comprises around 5.4K real and deepfake videos that are manually annotated with text descriptions (to explain the artifacts) and clicks (to point out the artifacts). We evaluate a number of vision-language models on ExDDV, performing experiments with various fine-tuning and in-context learning strategies. Our results show that text and click supervision are both required to develop robust explainable models for deepfake videos, which are able to localize and describe the observed artifacts. Our novel dataset and code to reproduce the results are available at <a href="https://github.com/vladhondru25/ExDDV" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item159'>[159]</a> <a href ="/abs/2503.14432" title="Abstract" id="2503.14432"> arXiv:2503.14432 </a> (cross-list from cs.CL) [<a href="/pdf/2503.14432" title="Download PDF" id="pdf-2503.14432" aria-labelledby="pdf-2503.14432">pdf</a>, <a href="https://arxiv.org/html/2503.14432v1" title="View HTML" id="html-2503.14432" aria-labelledby="html-2503.14432" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14432" title="Other formats" id="oth-2503.14432" aria-labelledby="oth-2503.14432">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> PLAY2PROMPT: Zero-shot Tool Instruction Optimization for LLM Agents via Tool Play </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Fang,+W">Wei Fang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Y">Yang Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Qian,+K">Kaizhi Qian</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Glass,+J">James Glass</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+Y">Yada Zhu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> Large language models (LLMs) are increasingly integrated with specialized external tools, yet many tasks demand zero-shot tool usage with minimal or noisy documentation. Existing solutions rely on manual rewriting or labeled data for validation, making them inapplicable in true zero-shot settings. To address these challenges, we propose PLAY2PROMPT, an automated framework that systematically "plays" with each tool to explore its input-output behaviors. Through this iterative trial-and-error process, PLAY2PROMPT refines tool documentation and generates usage examples without any labeled data. These examples not only guide LLM inference but also serve as validation to further enhance tool utilization. Extensive experiments on real-world tasks demonstrate that PLAY2PROMPT significantly improves zero-shot tool performance across both open and closed models, offering a scalable and effective solution for domain-specific tool integration. </p> </div> </dd> <dt> <a name='item160'>[160]</a> <a href ="/abs/2503.14453" title="Abstract" id="2503.14453"> arXiv:2503.14453 </a> (cross-list from stat.ML) [<a href="/pdf/2503.14453" title="Download PDF" id="pdf-2503.14453" aria-labelledby="pdf-2503.14453">pdf</a>, <a href="https://arxiv.org/html/2503.14453v1" title="View HTML" id="html-2503.14453" aria-labelledby="html-2503.14453" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14453" title="Other formats" id="oth-2503.14453" aria-labelledby="oth-2503.14453">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Online Conformal Probabilistic Numerics via Adaptive Edge-Cloud Offloading </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Hou,+Q">Qiushuo Hou</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Park,+S">Sangwoo Park</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Zecchin,+M">Matteo Zecchin</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Cai,+Y">Yunlong Cai</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Yu,+G">Guanding Yu</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Simeone,+O">Osvaldo Simeone</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> This paper has been submitted to a conference </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Consider an edge computing setting in which a user submits queries for the solution of a linear system to an edge processor, which is subject to time-varying computing availability. The edge processor applies a probabilistic linear solver (PLS) so as to be able to respond to the user's query within the allotted time and computing budget. Feedback to the user is in the form of an uncertainty set. Due to model misspecification, the uncertainty set obtained via a direct application of PLS does not come with coverage guarantees with respect to the true solution of the linear system. This work introduces a new method to calibrate the uncertainty sets produced by PLS with the aim of guaranteeing long-term coverage requirements. The proposed method, referred to as online conformal prediction-PLS (OCP-PLS), assumes sporadic feedback from cloud to edge. This enables the online calibration of uncertainty thresholds via online conformal prediction (OCP), an online optimization method previously studied in the context of prediction models. The validity of OCP-PLS is verified via experiments that bring insights into trade-offs between coverage, prediction set size, and cloud usage. </p> </div> </dd> <dt> <a name='item161'>[161]</a> <a href ="/abs/2503.14456" title="Abstract" id="2503.14456"> arXiv:2503.14456 </a> (cross-list from cs.CL) [<a href="/pdf/2503.14456" title="Download PDF" id="pdf-2503.14456" aria-labelledby="pdf-2503.14456">pdf</a>, <a href="/format/2503.14456" title="Other formats" id="oth-2503.14456" aria-labelledby="oth-2503.14456">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> RWKV-7 "Goose" with Expressive Dynamic State Evolution </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Peng,+B">Bo Peng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+R">Ruichong Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Goldstein,+D">Daniel Goldstein</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Alcaide,+E">Eric Alcaide</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hou,+H">Haowen Hou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lu,+J">Janna Lu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Merrill,+W">William Merrill</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Song,+G">Guangyu Song</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tan,+K">Kaifeng Tan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Utpala,+S">Saiteja Utpala</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wilce,+N">Nathan Wilce</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wind,+J+S">Johan S. Wind</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+T">Tianyi Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wuttke,+D">Daniel Wuttke</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou-Zheng,+C">Christian Zhou-Zheng</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> We present RWKV-7 "Goose", a new sequence modeling architecture, along with pre-trained language models that establish a new state-of-the-art in downstream performance at the 3 billion parameter scale on multilingual tasks, and match current SoTA English language performance despite being trained on dramatically fewer tokens than other top 3B models. Nevertheless, RWKV-7 models require only constant memory usage and constant inference time per token. RWKV-7 introduces a newly generalized formulation of the delta rule with vector-valued gating and in-context learning rates, as well as a relaxed value replacement rule. We show that RWKV-7 can perform state tracking and recognize all regular languages, while retaining parallelizability of training. This exceeds the capabilities of Transformers under standard complexity conjectures, which are limited to $\mathsf{TC}^0$. To demonstrate RWKV-7's language modeling capability, we also present an extended open source 3.1 trillion token multilingual corpus, and train four RWKV-7 models ranging from 0.19 billion to 2.9 billion parameters on this dataset. <br>To foster openness, reproduction, and adoption, we release our models and dataset component listing at <a href="https://huggingface.co/RWKV" rel="external noopener nofollow" class="link-external link-https">this https URL</a>, and our training and inference code at <a href="https://github.com/RWKV/RWKV-LM" rel="external noopener nofollow" class="link-external link-https">this https URL</a> all under the Apache 2.0 License. </p> </div> </dd> <dt> <a name='item162'>[162]</a> <a href ="/abs/2503.14459" title="Abstract" id="2503.14459"> arXiv:2503.14459 </a> (cross-list from stat.ML) [<a href="/pdf/2503.14459" title="Download PDF" id="pdf-2503.14459" aria-labelledby="pdf-2503.14459">pdf</a>, <a href="https://arxiv.org/html/2503.14459v1" title="View HTML" id="html-2503.14459" aria-labelledby="html-2503.14459" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14459" title="Other formats" id="oth-2503.14459" aria-labelledby="oth-2503.14459">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Doubly robust identification of treatment effects from multiple environments </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=De+Bartolomeis,+P">Piersilvio De Bartolomeis</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Kostin,+J">Julia Kostin</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Abad,+J">Javier Abad</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Wang,+Y">Yixin Wang</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Yang,+F">Fanny Yang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted for presentation at the International Conference on Learning Representations (ICLR) 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Machine Learning (cs.LG); Methodology (stat.ME) </div> <p class='mathjax'> Practical and ethical constraints often require the use of observational data for causal inference, particularly in medicine and social sciences. Yet, observational datasets are prone to confounding, potentially compromising the validity of causal conclusions. While it is possible to correct for biases if the underlying causal graph is known, this is rarely a feasible ask in practical scenarios. A common strategy is to adjust for all available covariates, yet this approach can yield biased treatment effect estimates, especially when post-treatment or unobserved variables are present. We propose RAMEN, an algorithm that produces unbiased treatment effect estimates by leveraging the heterogeneity of multiple data sources without the need to know or learn the underlying causal graph. Notably, RAMEN achieves doubly robust identification: it can identify the treatment effect whenever the causal parents of the treatment or those of the outcome are observed, and the node whose parents are observed satisfies an invariance assumption. Empirical evaluations on synthetic and real-world datasets show that our approach outperforms existing methods. </p> </div> </dd> <dt> <a name='item163'>[163]</a> <a href ="/abs/2503.14473" title="Abstract" id="2503.14473"> arXiv:2503.14473 </a> (cross-list from quant-ph) [<a href="/pdf/2503.14473" title="Download PDF" id="pdf-2503.14473" aria-labelledby="pdf-2503.14473">pdf</a>, <a href="https://arxiv.org/html/2503.14473v1" title="View HTML" id="html-2503.14473" aria-labelledby="html-2503.14473" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14473" title="Other formats" id="oth-2503.14473" aria-labelledby="oth-2503.14473">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> EnQode: Fast Amplitude Embedding for Quantum Machine Learning Using Classical Data </div> <div class='list-authors'><a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Han,+J">Jason Han</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=DiBrita,+N+S">Nicholas S. DiBrita</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Cho,+Y">Younghyun Cho</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Luo,+H">Hengrui Luo</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Patel,+T">Tirthak Patel</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> EnQode will appear in the Proceedings of the Design Automation Conference (DAC), 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Quantum Physics (quant-ph)</span>; Emerging Technologies (cs.ET); Machine Learning (cs.LG) </div> <p class='mathjax'> Amplitude embedding (AE) is essential in quantum machine learning (QML) for encoding classical data onto quantum circuits. However, conventional AE methods suffer from deep, variable-length circuits that introduce high output error due to extensive gate usage and variable error rates across samples, resulting in noise-driven inconsistencies that degrade model accuracy. We introduce EnQode, a fast AE technique based on symbolic representation that addresses these limitations by clustering dataset samples and solving for cluster mean states through a low-depth, machine-specific ansatz. Optimized to reduce physical gates and SWAP operations, EnQode ensures all samples face consistent, low noise levels by standardizing circuit depth and composition. With over 90% fidelity in data mapping, EnQode enables robust, high-performance QML on noisy intermediate-scale quantum (NISQ) devices. Our open-source solution provides a scalable and efficient alternative for integrating classical data with quantum models. </p> </div> </dd> <dt> <a name='item164'>[164]</a> <a href ="/abs/2503.14492" title="Abstract" id="2503.14492"> arXiv:2503.14492 </a> (cross-list from cs.CV) [<a href="/pdf/2503.14492" title="Download PDF" id="pdf-2503.14492" aria-labelledby="pdf-2503.14492">pdf</a>, <a href="https://arxiv.org/html/2503.14492v1" title="View HTML" id="html-2503.14492" aria-labelledby="html-2503.14492" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14492" title="Other formats" id="oth-2503.14492" aria-labelledby="oth-2503.14492">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Cosmos-Transfer1: Conditional World Generation with Adaptive Multimodal Control </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=NVIDIA">NVIDIA</a>: <a href="https://arxiv.org/search/cs?searchtype=author&query=Alhaija,+H+A">Hassan Abu Alhaija</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Alvarez,+J">Jose Alvarez</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bala,+M">Maciej Bala</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cai,+T">Tiffany Cai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cao,+T">Tianshi Cao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cha,+L">Liz Cha</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+J">Joshua Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+M">Mike Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ferroni,+F">Francesco Ferroni</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fidler,+S">Sanja Fidler</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fox,+D">Dieter Fox</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ge,+Y">Yunhao Ge</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gu,+J">Jinwei Gu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hassani,+A">Ali Hassani</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Isaev,+M">Michael Isaev</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jannaty,+P">Pooya Jannaty</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lan,+S">Shiyi Lan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lasser,+T">Tobias Lasser</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ling,+H">Huan Ling</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+M">Ming-Yu Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+X">Xian Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lu,+Y">Yifan Lu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Luo,+A">Alice Luo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ma,+Q">Qianli Ma</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mao,+H">Hanzi Mao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ramos,+F">Fabio Ramos</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ren,+X">Xuanchi Ren</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shen,+T">Tianchang Shen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tang,+S">Shitao Tang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+T">Ting-Chun Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+J">Jay Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+J">Jiashu Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+S">Stella Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xie,+K">Kevin Xie</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ye,+Y">Yuchong Ye</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+X">Xiaodong Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zeng,+X">Xiaohui Zeng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zeng,+Y">Yu Zeng</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG); Robotics (cs.RO) </div> <p class='mathjax'> We introduce Cosmos-Transfer, a conditional world generation model that can generate world simulations based on multiple spatial control inputs of various modalities such as segmentation, depth, and edge. In the design, the spatial conditional scheme is adaptive and customizable. It allows weighting different conditional inputs differently at different spatial locations. This enables highly controllable world generation and finds use in various world-to-world transfer use cases, including Sim2Real. We conduct extensive evaluations to analyze the proposed model and demonstrate its applications for Physical AI, including robotics Sim2Real and autonomous vehicle data enrichment. We further demonstrate an inference scaling strategy to achieve real-time world generation with an NVIDIA GB200 NVL72 rack. To help accelerate research development in the field, we open-source our models and code at <a href="https://github.com/nvidia-cosmos/cosmos-transfer1" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item165'>[165]</a> <a href ="/abs/2503.14495" title="Abstract" id="2503.14495"> arXiv:2503.14495 </a> (cross-list from cs.CL) [<a href="/pdf/2503.14495" title="Download PDF" id="pdf-2503.14495" aria-labelledby="pdf-2503.14495">pdf</a>, <a href="https://arxiv.org/html/2503.14495v1" title="View HTML" id="html-2503.14495" aria-labelledby="html-2503.14495" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14495" title="Other formats" id="oth-2503.14495" aria-labelledby="oth-2503.14495">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Temporal Consistency for LLM Reasoning Process Error Identification </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Guo,+J">Jiacheng Guo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+Y">Yue Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Qiu,+J">Jiahao Qiu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+K">Kaixuan Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Juan,+X">Xinzhe Juan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+L">Ling Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+M">Mengdi Wang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Verification is crucial for effective mathematical reasoning. We present a new temporal consistency method where verifiers iteratively refine their judgments based on the previous assessment. Unlike one-round verification or multi-model debate approaches, our method leverages consistency in a sequence of self-reflection actions to improve verification accuracy. Empirical evaluations across diverse mathematical process error identification benchmarks (Mathcheck, ProcessBench, and PRM800K) show consistent performance improvements over baseline methods. When applied to the recent DeepSeek R1 distilled models, our method demonstrates strong performance, enabling 7B/8B distilled models to outperform all 70B/72B models and GPT-4o on ProcessBench. Notably, the distilled 14B model with our method achieves performance comparable to Deepseek-R1. Our codes are available at <a href="https://github.com/jcguo123/Temporal-Consistency" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </p> </div> </dd> <dt> <a name='item166'>[166]</a> <a href ="/abs/2503.14499" title="Abstract" id="2503.14499"> arXiv:2503.14499 </a> (cross-list from cs.AI) [<a href="/pdf/2503.14499" title="Download PDF" id="pdf-2503.14499" aria-labelledby="pdf-2503.14499">pdf</a>, <a href="https://arxiv.org/html/2503.14499v1" title="View HTML" id="html-2503.14499" aria-labelledby="html-2503.14499" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14499" title="Other formats" id="oth-2503.14499" aria-labelledby="oth-2503.14499">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Measuring AI Ability to Complete Long Tasks </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Kwa,+T">Thomas Kwa</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=West,+B">Ben West</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Becker,+J">Joel Becker</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Deng,+A">Amy Deng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Garcia,+K">Katharyn Garcia</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hasin,+M">Max Hasin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jawhar,+S">Sami Jawhar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kinniment,+M">Megan Kinniment</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rush,+N">Nate Rush</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Von+Arx,+S">Sydney Von Arx</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bloom,+R">Ryan Bloom</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Broadley,+T">Thomas Broadley</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Du,+H">Haoxing Du</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Goodrich,+B">Brian Goodrich</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jurkovic,+N">Nikola Jurkovic</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Miles,+L+H">Luke Harold Miles</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nix,+S">Seraphina Nix</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lin,+T">Tao Lin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Parikh,+N">Neev Parikh</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rein,+D">David Rein</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sato,+L+J+K">Lucas Jun Koba Sato</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wijk,+H">Hjalmar Wijk</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ziegler,+D+M">Daniel M. Ziegler</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Barnes,+E">Elizabeth Barnes</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chan,+L">Lawrence Chan</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Artificial Intelligence (cs.AI)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Despite rapid progress on AI benchmarks, the real-world meaning of benchmark performance remains unclear. To quantify the capabilities of AI systems in terms of human capabilities, we propose a new metric: 50%-task-completion time horizon. This is the time humans typically take to complete tasks that AI models can complete with 50% success rate. We first timed humans with relevant domain expertise on a combination of RE-Bench, HCAST, and 66 novel shorter tasks. On these tasks, current frontier AI models such as Claude 3.7 Sonnet have a 50% time horizon of around 50 minutes. Furthermore, frontier AI time horizon has been doubling approximately every seven months since 2019, though the trend may have accelerated in 2024. The increase in AI models' time horizons seems to be primarily driven by greater reliability and ability to adapt to mistakes, combined with better logical reasoning and tool use capabilities. We discuss the limitations of our results -- including their degree of external validity -- and the implications of increased autonomy for dangerous capabilities. If these results generalize to real-world software tasks, extrapolation of this trend predicts that within 5 years, AI systems will be capable of automating many software tasks that currently take humans a month. </p> </div> </dd> <dt> <a name='item167'>[167]</a> <a href ="/abs/2503.14500" title="Abstract" id="2503.14500"> arXiv:2503.14500 </a> (cross-list from cs.CV) [<a href="/pdf/2503.14500" title="Download PDF" id="pdf-2503.14500" aria-labelledby="pdf-2503.14500">pdf</a>, <a href="https://arxiv.org/html/2503.14500v1" title="View HTML" id="html-2503.14500" aria-labelledby="html-2503.14500" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14500" title="Other formats" id="oth-2503.14500" aria-labelledby="oth-2503.14500">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Utilization of Neighbor Information for Image Classification with Different Levels of Supervision </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Jayatilaka,+G">Gihan Jayatilaka</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shrivastava,+A">Abhinav Shrivastava</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gwilliam,+M">Matthew Gwilliam</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 18 pages, 16 figures, 7 tables </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> We propose to bridge the gap between semi-supervised and unsupervised image recognition with a flexible method that performs well for both generalized category discovery (GCD) and image clustering. Despite the overlap in motivation between these tasks, the methods themselves are restricted to a single task -- GCD methods are reliant on the labeled portion of the data, and deep image clustering methods have no built-in way to leverage the labels efficiently. We connect the two regimes with an innovative approach that Utilizes Neighbor Information for Classification (UNIC) both in the unsupervised (clustering) and semisupervised (GCD) setting. State-of-the-art clustering methods already rely heavily on nearest neighbors. We improve on their results substantially in two parts, first with a sampling and cleaning strategy where we identify accurate positive and negative neighbors, and secondly by finetuning the backbone with clustering losses computed by sampling both types of neighbors. We then adapt this pipeline to GCD by utilizing the labelled images as ground truth neighbors. Our method yields state-of-the-art results for both clustering (+3% ImageNet-100, Imagenet200) and GCD (+0.8% ImageNet-100, +5% CUB, +2% SCars, +4% Aircraft). </p> </div> </dd> <dt> <a name='item168'>[168]</a> <a href ="/abs/2503.14503" title="Abstract" id="2503.14503"> arXiv:2503.14503 </a> (cross-list from cs.CV) [<a href="/pdf/2503.14503" title="Download PDF" id="pdf-2503.14503" aria-labelledby="pdf-2503.14503">pdf</a>, <a href="https://arxiv.org/html/2503.14503v1" title="View HTML" id="html-2503.14503" aria-labelledby="html-2503.14503" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14503" title="Other formats" id="oth-2503.14503" aria-labelledby="oth-2503.14503">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> The Power of Context: How Multimodality Improves Image Super-Resolution </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Mei,+K">Kangfu Mei</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Talebi,+H">Hossein Talebi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ardakani,+M">Mojtaba Ardakani</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Patel,+V+M">Vishal M. Patel</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Milanfar,+P">Peyman Milanfar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Delbracio,+M">Mauricio Delbracio</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> accepted by CVPR2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> Single-image super-resolution (SISR) remains challenging due to the inherent difficulty of recovering fine-grained details and preserving perceptual quality from low-resolution inputs. Existing methods often rely on limited image priors, leading to suboptimal results. We propose a novel approach that leverages the rich contextual information available in multiple modalities -- including depth, segmentation, edges, and text prompts -- to learn a powerful generative prior for SISR within a diffusion model framework. We introduce a flexible network architecture that effectively fuses multimodal information, accommodating an arbitrary number of input modalities without requiring significant modifications to the diffusion process. Crucially, we mitigate hallucinations, often introduced by text prompts, by using spatial information from other modalities to guide regional text-based conditioning. Each modality's guidance strength can also be controlled independently, allowing steering outputs toward different directions, such as increasing bokeh through depth or adjusting object prominence via segmentation. Extensive experiments demonstrate that our model surpasses state-of-the-art generative SISR methods, achieving superior visual quality and fidelity. See project page at <a href="https://mmsr.kfmei.com/" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item169'>[169]</a> <a href ="/abs/2503.14505" title="Abstract" id="2503.14505"> arXiv:2503.14505 </a> (cross-list from cs.CV) [<a href="/pdf/2503.14505" title="Download PDF" id="pdf-2503.14505" aria-labelledby="pdf-2503.14505">pdf</a>, <a href="https://arxiv.org/html/2503.14505v1" title="View HTML" id="html-2503.14505" aria-labelledby="html-2503.14505" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14505" title="Other formats" id="oth-2503.14505" aria-labelledby="oth-2503.14505">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> MusicInfuser: Making Video Diffusion Listen and Dance </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Hong,+S">Susung Hong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kemelmacher-Shlizerman,+I">Ira Kemelmacher-Shlizerman</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Curless,+B">Brian Curless</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Seitz,+S+M">Steven M. Seitz</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Project page: <a href="https://susunghong.github.io/MusicInfuser" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> We introduce MusicInfuser, an approach for generating high-quality dance videos that are synchronized to a specified music track. Rather than attempting to design and train a new multimodal audio-video model, we show how existing video diffusion models can be adapted to align with musical inputs by introducing lightweight music-video cross-attention and a low-rank adapter. Unlike prior work requiring motion capture data, our approach fine-tunes only on dance videos. MusicInfuser achieves high-quality music-driven video generation while preserving the flexibility and generative capabilities of the underlying models. We introduce an evaluation framework using Video-LLMs to assess multiple dimensions of dance generation quality. The project page and code are available at <a href="https://susunghong.github.io/MusicInfuser" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> </dl> <dl id='articles'> <h3>Replacement submissions (showing 122 of 122 entries)</h3> <dt> <a name='item170'>[170]</a> <a href ="/abs/2010.02990" title="Abstract" id="2010.02990"> arXiv:2010.02990 </a> (replaced) [<a href="/pdf/2010.02990" title="Download PDF" id="pdf-2010.02990" aria-labelledby="pdf-2010.02990">pdf</a>, <a href="https://arxiv.org/html/2010.02990v5" title="View HTML" id="html-2010.02990" aria-labelledby="html-2010.02990" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2010.02990" title="Other formats" id="oth-2010.02990" aria-labelledby="oth-2010.02990">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> On The Convergence of Euler Discretization of Finite-Time Convergent Gradient Flows </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+S">Siqi Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Benosman,+M">Mouhacine Benosman</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Romero,+O">Orlando Romero</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Systems and Control (eess.SY) </div> <p class='mathjax'> In this study, we investigate the performance of two novel first-order optimization algorithms, namely the rescaled-gradient flow (RGF) and the signed-gradient flow (SGF). These algorithms are derived from the forward Euler discretization of finite-time convergent flows, comprised of non-Lipschitz dynamical systems, which locally converge to the minima of gradient-dominated functions. We first characterize the closeness between the continuous flows and the discretizations, then we proceed to present (linear) convergence guarantees of the discrete algorithms (in the general and the stochastic case). Furthermore, in cases where problem parameters remain unknown or exhibit non-uniformity, we further integrate the line-search strategy with RGF/SGF and provide convergence analysis in this setting. We then apply the proposed algorithms to academic examples and deep neural network training, our results show that our schemes demonstrate faster convergences against standard optimization alternatives. </p> </div> </dd> <dt> <a name='item171'>[171]</a> <a href ="/abs/2301.12351" title="Abstract" id="2301.12351"> arXiv:2301.12351 </a> (replaced) [<a href="/pdf/2301.12351" title="Download PDF" id="pdf-2301.12351" aria-labelledby="pdf-2301.12351">pdf</a>, <a href="https://arxiv.org/html/2301.12351v4" title="View HTML" id="html-2301.12351" aria-labelledby="html-2301.12351" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2301.12351" title="Other formats" id="oth-2301.12351" aria-labelledby="oth-2301.12351">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Emerging Synergies in Causality and Deep Generative Models: A Survey </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+G">Guanglin Zhou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xie,+S">Shaoan Xie</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hao,+G">Guang-Yuan Hao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+S">Shiming Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+B">Biwei Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+X">Xiwei Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+C">Chen Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+L">Liming Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yao,+L">Lina Yao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+K">Kun Zhang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> In the field of artificial intelligence (AI), the quest to understand and model data-generating processes (DGPs) is of paramount importance. Deep generative models (DGMs) have proven adept in capturing complex data distributions but often fall short in generalization and interpretability. On the other hand, causality offers a structured lens to comprehend the mechanisms driving data generation and highlights the causal-effect dynamics inherent in these processes. While causality excels in interpretability and the ability to extrapolate, it grapples with intricacies of high-dimensional spaces. Recognizing the synergistic potential, we delve into the confluence of causality and DGMs. We elucidate the integration of causal principles within DGMs, investigate causal identification using DGMs, and navigate an emerging research frontier of causality in large-scale generative models, particularly generative large language models (LLMs). We offer insights into methodologies, highlight open challenges, and suggest future directions, positioning our comprehensive review as an essential guide in this swiftly emerging and evolving area. </p> </div> </dd> <dt> <a name='item172'>[172]</a> <a href ="/abs/2305.07612" title="Abstract" id="2305.07612"> arXiv:2305.07612 </a> (replaced) [<a href="/pdf/2305.07612" title="Download PDF" id="pdf-2305.07612" aria-labelledby="pdf-2305.07612">pdf</a>, <a href="https://arxiv.org/html/2305.07612v2" title="View HTML" id="html-2305.07612" aria-labelledby="html-2305.07612" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2305.07612" title="Other formats" id="oth-2305.07612" aria-labelledby="oth-2305.07612">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Lower Bounds and Accelerated Algorithms in Distributed Stochastic Optimization with Communication Compression </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=He,+Y">Yutong He</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+X">Xinmeng Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+Y">Yiming Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yin,+W">Wotao Yin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yuan,+K">Kun Yuan</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Distributed, Parallel, and Cluster Computing (cs.DC); Optimization and Control (math.OC) </div> <p class='mathjax'> Communication compression is an essential strategy for alleviating communication overhead by reducing the volume of information exchanged between computing nodes in large-scale distributed stochastic optimization. Although numerous algorithms with convergence guarantees have been obtained, the optimal performance limit under communication compression remains unclear. <br>In this paper, we investigate the performance limit of distributed stochastic optimization algorithms employing communication compression. We focus on two main types of compressors, unbiased and contractive, and address the best-possible convergence rates one can obtain with these compressors. We establish the lower bounds for the convergence rates of distributed stochastic optimization in six different settings, combining strongly-convex, generally-convex, or non-convex functions with unbiased or contractive compressor types. To bridge the gap between lower bounds and existing algorithms' rates, we propose NEOLITHIC, a nearly optimal algorithm with compression that achieves the established lower bounds up to logarithmic factors under mild conditions. Extensive experimental results support our theoretical findings. This work provides insights into the theoretical limitations of existing compressors and motivates further research into fundamentally new compressor properties. </p> </div> </dd> <dt> <a name='item173'>[173]</a> <a href ="/abs/2305.15598" title="Abstract" id="2305.15598"> arXiv:2305.15598 </a> (replaced) [<a href="/pdf/2305.15598" title="Download PDF" id="pdf-2305.15598" aria-labelledby="pdf-2305.15598">pdf</a>, <a href="https://arxiv.org/html/2305.15598v4" title="View HTML" id="html-2305.15598" aria-labelledby="html-2305.15598" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2305.15598" title="Other formats" id="oth-2305.15598" aria-labelledby="oth-2305.15598">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> ReLU Neural Networks with Linear Layers are Biased Towards Single- and Multi-Index Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Parkinson,+S">Suzanna Parkinson</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ongie,+G">Greg Ongie</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Willett,+R">Rebecca Willett</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Machine Learning (stat.ML) </div> <p class='mathjax'> Neural networks often operate in the overparameterized regime, in which there are far more parameters than training samples, allowing the training data to be fit perfectly. That is, training the network effectively learns an interpolating function, and properties of the interpolant affect predictions the network will make on new samples. This manuscript explores how properties of such functions learned by neural networks of depth greater than two layers. Our framework considers a family of networks of varying depths that all have the same capacity but different representation costs. The representation cost of a function induced by a neural network architecture is the minimum sum of squared weights needed for the network to represent the function; it reflects the function space bias associated with the architecture. Our results show that adding additional linear layers to the input side of a shallow ReLU network yields a representation cost favoring functions with low mixed variation -- that is, it has limited variation in directions orthogonal to a low-dimensional subspace and can be well approximated by a single- or multi-index model. This bias occurs because minimizing the sum of squared weights of the linear layers is equivalent to minimizing a low-rank promoting Schatten quasi-norm of a single "virtual" weight matrix. Our experiments confirm this behavior in standard network training regimes. They additionally show that linear layers can improve generalization and the learned network is well-aligned with the true latent low-dimensional linear subspace when data is generated using a multi-index model. </p> </div> </dd> <dt> <a name='item174'>[174]</a> <a href ="/abs/2310.04264" title="Abstract" id="2310.04264"> arXiv:2310.04264 </a> (replaced) [<a href="/pdf/2310.04264" title="Download PDF" id="pdf-2310.04264" aria-labelledby="pdf-2310.04264">pdf</a>, <a href="https://arxiv.org/html/2310.04264v5" title="View HTML" id="html-2310.04264" aria-labelledby="html-2310.04264" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2310.04264" title="Other formats" id="oth-2310.04264" aria-labelledby="oth-2310.04264">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Deep learning modelling of manufacturing and build variations on multi-stage axial compressors aerodynamics </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Bruni,+G">Giuseppe Bruni</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Maleki,+S">Sepehr Maleki</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Krishnababu,+S+K">Senthil K. Krishnababu</a></div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> Data-Centric Engineering, vol. 6, p. e9, 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Computational Engineering, Finance, and Science (cs.CE); Fluid Dynamics (physics.flu-dyn) </div> <p class='mathjax'> Applications of deep learning to physical simulations such as Computational Fluid Dynamics have recently experienced a surge in interest, and their viability has been demonstrated in different domains. However, due to the highly complex, turbulent, and three-dimensional flows, they have not yet been proven usable for turbomachinery applications. Multistage axial compressors for gas turbine applications represent a remarkably challenging case, due to the high-dimensionality of the regression of the flow field from geometrical and operational variables. This paper demonstrates the development and application of a deep learning framework for predictions of the flow field and aerodynamic performance of multistage axial compressors. A physics-based dimensionality reduction approach unlocks the potential for flow-field predictions, as it re-formulates the regression problem from an unstructured to a structured one, as well as reducing the number of degrees of freedom. Compared to traditional "black-box" surrogate models, it provides explainability to the predictions of the overall performance by identifying the corresponding aerodynamic drivers. The model is applied to manufacturing and build variations, as the associated performance scatter is known to have a significant impact on $CO_2$ emissions, which poses a challenge of great industrial and environmental relevance. The proposed architecture is proven to achieve an accuracy comparable to that of the CFD benchmark, in real-time, for an industrially relevant application. The deployed model is readily integrated within the manufacturing and build process of gas turbines, thus providing the opportunity to analytically assess the impact on performance with actionable and explainable data. </p> </div> </dd> <dt> <a name='item175'>[175]</a> <a href ="/abs/2310.17042" title="Abstract" id="2310.17042"> arXiv:2310.17042 </a> (replaced) [<a href="/pdf/2310.17042" title="Download PDF" id="pdf-2310.17042" aria-labelledby="pdf-2310.17042">pdf</a>, <a href="https://arxiv.org/html/2310.17042v4" title="View HTML" id="html-2310.17042" aria-labelledby="html-2310.17042" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2310.17042" title="Other formats" id="oth-2310.17042" aria-labelledby="oth-2310.17042">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Stochastic Gradient Sampling for Enhancing Neural Networks Training </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Yun,+J">Juyoung Yun</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Computer Vision and Pattern Recognition (cs.CV); Neural and Evolutionary Computing (cs.NE) </div> <p class='mathjax'> In this paper, we introduce StochGradAdam, a novel optimizer designed as an extension of the Adam algorithm, incorporating stochastic gradient sampling techniques to improve computational efficiency while maintaining robust performance. StochGradAdam optimizes by selectively sampling a subset of gradients during training, reducing the computational cost while preserving the advantages of adaptive learning rates and bias corrections found in Adam. Our experimental results, applied to image classification and segmentation tasks, demonstrate that StochGradAdam can achieve comparable or superior performance to Adam, even when using fewer gradient updates per iteration. By focusing on key gradient updates, StochGradAdam offers stable convergence and enhanced exploration of the loss landscape, while mitigating the impact of noisy gradients. The results suggest that this approach is particularly effective for large-scale models and datasets, providing a promising alternative to traditional optimization techniques for deep learning applications. </p> </div> </dd> <dt> <a name='item176'>[176]</a> <a href ="/abs/2402.13765" title="Abstract" id="2402.13765"> arXiv:2402.13765 </a> (replaced) [<a href="/pdf/2402.13765" title="Download PDF" id="pdf-2402.13765" aria-labelledby="pdf-2402.13765">pdf</a>, <a href="https://arxiv.org/html/2402.13765v2" title="View HTML" id="html-2402.13765" aria-labelledby="html-2402.13765" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2402.13765" title="Other formats" id="oth-2402.13765" aria-labelledby="oth-2402.13765">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Accuracy-Preserving Calibration via Statistical Modeling on Probability Simplex </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Esaki,+Y">Yasushi Esaki</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nakamura,+A">Akihiro Nakamura</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kawano,+K">Keisuke Kawano</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tokuhisa,+R">Ryoko Tokuhisa</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kutsuna,+T">Takuro Kutsuna</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted at the 27th International Conference on Artificial Intelligence and Statistics (AISTATS) 2024. The code is available at <a href="https://github.com/ToyotaCRDL/SimplexTS" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> Proceedings of The 27th International Conference on Artificial Intelligence and Statistics, PMLR 238:1666-1674, 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Machine Learning (stat.ML) </div> <p class='mathjax'> Classification models based on deep neural networks (DNNs) must be calibrated to measure the reliability of predictions. Some recent calibration methods have employed a probabilistic model on the probability simplex. However, these calibration methods cannot preserve the accuracy of pre-trained models, even those with a high classification accuracy. We propose an accuracy-preserving calibration method using the Concrete distribution as the probabilistic model on the probability simplex. We theoretically prove that a DNN model trained on cross-entropy loss has optimality as the parameter of the Concrete distribution. We also propose an efficient method that synthetically generates samples for training probabilistic models on the probability simplex. We demonstrate that the proposed method can outperform previous methods in accuracy-preserving calibration tasks using benchmarks. The code is available at <a href="https://github.com/ToyotaCRDL/SimplexTS" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item177'>[177]</a> <a href ="/abs/2402.16562" title="Abstract" id="2402.16562"> arXiv:2402.16562 </a> (replaced) [<a href="/pdf/2402.16562" title="Download PDF" id="pdf-2402.16562" aria-labelledby="pdf-2402.16562">pdf</a>, <a href="https://arxiv.org/html/2402.16562v4" title="View HTML" id="html-2402.16562" aria-labelledby="html-2402.16562" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2402.16562" title="Other formats" id="oth-2402.16562" aria-labelledby="oth-2402.16562">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> QF-tuner: Breaking Tradition in Reinforcement Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Jumaah,+M+A">Mahmood A. Jumaah</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ali,+Y+H">Yossra H. Ali</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rashid,+T+A">Tarik A. Rashid</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 11 pages </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Neural and Evolutionary Computing (cs.NE) </div> <p class='mathjax'> In reinforcement learning algorithms, the hyperparameters tuning method refers to choosing the optimal parameters that may increase the overall performance. Manual or random hyperparameter tuning methods can lead to different results in the reinforcement learning algorithms. In this paper, we propose a new method called QF-tuner for automatic hyperparameter tuning in the Q learning algorithm using the FOX optimization algorithm (FOX). Furthermore, a new objective function has been employed within FOX that prioritizes reward over learning error and time. QF tuner starts by running the FOX and tries to minimize the fitness value derived from observations at each iteration by executing the Q-learning algorithm. The proposed method has been evaluated using two control tasks from the OpenAI Gym: CartPole and FrozenLake. The empirical results indicate that the QF-tuner outperforms other optimization algorithms, such as particle swarm optimization (PSO), bees algorithm (BA), genetic algorithms (GA), and the random method. However, on the FrozenLake task, the QF-tuner increased rewards by 36% and reduced learning time by 26%, while on the CartPole task, it increased rewards by 57% and reduced learning time by 20%. Thus, the QF-tuner is an essential method for hyperparameter tuning in Q-learning algorithms, enabling more effective solutions to control task problems. </p> </div> </dd> <dt> <a name='item178'>[178]</a> <a href ="/abs/2403.03362" title="Abstract" id="2403.03362"> arXiv:2403.03362 </a> (replaced) [<a href="/pdf/2403.03362" title="Download PDF" id="pdf-2403.03362" aria-labelledby="pdf-2403.03362">pdf</a>, <a href="https://arxiv.org/html/2403.03362v2" title="View HTML" id="html-2403.03362" aria-labelledby="html-2403.03362" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2403.03362" title="Other formats" id="oth-2403.03362" aria-labelledby="oth-2403.03362">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Level Set Teleportation: An Optimization Perspective </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Mishkin,+A">Aaron Mishkin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bietti,+A">Alberto Bietti</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gower,+R+M">Robert M. Gower</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Published at AISTATS 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Optimization and Control (math.OC) </div> <p class='mathjax'> We study level set teleportation, an optimization routine which tries to accelerate gradient descent (GD) by maximizing the gradient norm over a level set of the objective. While teleportation intuitively speeds-up GD via bigger steps, current work lacks convergence theory for convex functions, guarantees for solving the teleportation operator, and even clear empirical evidence showing this acceleration. We resolve these open questions. For convex functions satisfying Hessian stability, we prove that GD with teleportation obtains a combined sub-linear/linear convergence rate which is strictly faster than GD when the optimality gap is small. This is in sharp contrast to the standard (strongly) convex setting, where teleportation neither improves nor worsens convergence. To evaluate teleportation in practice, we develop a projected-gradient method requiring only Hessian-vector products. We use this to show that gradient methods with access to a teleportation oracle out-perform their standard versions on a variety of problems. We also find that GD with teleportation is faster than truncated Newton methods, particularly for non-convex optimization. </p> </div> </dd> <dt> <a name='item179'>[179]</a> <a href ="/abs/2404.07696" title="Abstract" id="2404.07696"> arXiv:2404.07696 </a> (replaced) [<a href="/pdf/2404.07696" title="Download PDF" id="pdf-2404.07696" aria-labelledby="pdf-2404.07696">pdf</a>, <a href="https://arxiv.org/html/2404.07696v2" title="View HTML" id="html-2404.07696" aria-labelledby="html-2404.07696" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2404.07696" title="Other formats" id="oth-2404.07696" aria-labelledby="oth-2404.07696">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Flatness Improves Backbone Generalisation in Few-shot Classification </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+R">Rui Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Trapp,+M">Martin Trapp</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Klasson,+M">Marcus Klasson</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Solin,+A">Arno Solin</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> Deployment of deep neural networks in real-world settings typically requires adaptation to new tasks with few examples. Few-shot classification (FSC) provides a solution to this problem by leveraging pre-trained backbones for fast adaptation to new classes. However, approaches for multi-domain FSC typically result in complex pipelines aimed at information fusion and task-specific adaptation without consideration of the importance of backbone training. In this work, we introduce an effective strategy for backbone training and selection in multi-domain FSC by utilizing flatness-aware training and fine-tuning. Our work is theoretically grounded and empirically performs on par or better than state-of-the-art methods despite being simpler. Further, our results indicate that backbone training is crucial for good generalisation in FSC across different adaptation methods. </p> </div> </dd> <dt> <a name='item180'>[180]</a> <a href ="/abs/2405.14099" title="Abstract" id="2405.14099"> arXiv:2405.14099 </a> (replaced) [<a href="/pdf/2405.14099" title="Download PDF" id="pdf-2405.14099" aria-labelledby="pdf-2405.14099">pdf</a>, <a href="https://arxiv.org/html/2405.14099v4" title="View HTML" id="html-2405.14099" aria-labelledby="html-2405.14099" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2405.14099" title="Other formats" id="oth-2405.14099" aria-labelledby="oth-2405.14099">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Automatic Differentiation is Essential in Training Neural Networks for Solving Differential Equations </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+C">Chuqi Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+Y">Yahong Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xiang,+Y">Yang Xiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hao,+W">Wenrui Hao</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Numerical Analysis (math.NA) </div> <p class='mathjax'> Neural network-based approaches have recently shown significant promise in solving partial differential equations (PDEs) in science and engineering, especially in scenarios featuring complex domains or incorporation of empirical data. One advantage of the neural network methods for PDEs lies in its automatic differentiation (AD), which necessitates only the sample points themselves, unlike traditional finite difference (FD) approximations that require nearby local points to compute derivatives. In this paper, we quantitatively demonstrate the advantage of AD in training neural networks. The concept of truncated entropy is introduced to characterize the training property. Specifically, through comprehensive experimental and theoretical analyses conducted on random feature models and two-layer neural networks, we discover that the defined truncated entropy serves as a reliable metric for quantifying the residual loss of random feature models and the training speed of neural networks for both AD and FD methods. Our experimental and theoretical analyses demonstrate that, from a training perspective, AD outperforms FD in solving PDEs. </p> </div> </dd> <dt> <a name='item181'>[181]</a> <a href ="/abs/2405.15304" title="Abstract" id="2405.15304"> arXiv:2405.15304 </a> (replaced) [<a href="/pdf/2405.15304" title="Download PDF" id="pdf-2405.15304" aria-labelledby="pdf-2405.15304">pdf</a>, <a href="https://arxiv.org/html/2405.15304v3" title="View HTML" id="html-2405.15304" aria-labelledby="html-2405.15304" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2405.15304" title="Other formats" id="oth-2405.15304" aria-labelledby="oth-2405.15304">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Unlearning Concepts in Diffusion Model via Concept Domain Correction and Concept Preserving Gradient </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+Y">Yongliang Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+S">Shiji Zhou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+M">Mingzhuo Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+L">Lianzhe Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chang,+H">Heng Chang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+W">Wenbo Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hu,+X">Xinting Hu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+X">Xiao Zhou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+X">Xu Yang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> AAAI 2025 camera-ready version </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> Text-to-image diffusion models have achieved remarkable success in generating photorealistic images. However, the inclusion of sensitive information during pre-training poses significant risks. Machine Unlearning (MU) offers a promising solution to eliminate sensitive concepts from these models. Despite its potential, existing MU methods face two main challenges: 1) limited generalization, where concept erasure is effective only within the unlearned set, failing to prevent sensitive concept generation from out-of-set prompts; and 2) utility degradation, where removing target concepts significantly impacts the model's overall performance. To address these issues, we propose a novel concept domain correction framework named \textbf{DoCo} (\textbf{Do}main \textbf{Co}rrection). By aligning the output domains of sensitive and anchor concepts through adversarial training, our approach ensures comprehensive unlearning of target concepts. Additionally, we introduce a concept-preserving gradient surgery technique that mitigates conflicting gradient components, thereby preserving the model's utility while unlearning specific concepts. Extensive experiments across various instances, styles, and offensive concepts demonstrate the effectiveness of our method in unlearning targeted concepts with minimal impact on related concepts, outperforming previous approaches even for out-of-distribution prompts. </p> </div> </dd> <dt> <a name='item182'>[182]</a> <a href ="/abs/2405.17465" title="Abstract" id="2405.17465"> arXiv:2405.17465 </a> (replaced) [<a href="/pdf/2405.17465" title="Download PDF" id="pdf-2405.17465" aria-labelledby="pdf-2405.17465">pdf</a>, <a href="https://arxiv.org/html/2405.17465v2" title="View HTML" id="html-2405.17465" aria-labelledby="html-2405.17465" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2405.17465" title="Other formats" id="oth-2405.17465" aria-labelledby="oth-2405.17465">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Information Fusion in Smart Agriculture: Machine Learning Applications and Future Research Directions </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Katharria,+A">Aashu Katharria</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rajwar,+K">Kanchan Rajwar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pant,+M">Millie Pant</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Vel%C3%A1squez,+J+D">Juan D. Vel谩squez</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sn%C3%A1%C5%A1el,+V">V谩clav Sn谩拧el</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Deep,+K">Kusum Deep</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Machine learning (ML) is a rapidly evolving technology with expanding applications across various fields. This paper presents a comprehensive survey of recent ML applications in agriculture for sustainability and efficiency. Existing reviews mainly focus on narrow subdomains or lack a fusion-driven perspectives. This study provides a combined analysis of ML applications in agriculture, structured around five key objectives: (i) Analyzing ML techniques across pre-harvesting, harvesting, and post-harvesting phases. (ii) Demonstrating how ML can be used with agricultural data and data fusion. (iii) Conducting a bibliometric and statistical analysis to reveal research trends and activity. (iv) Investigating real-world case studies of leading artificial intelligence (AI)-driven agricultural companies that use different types of multisensors and multisource data. (v) Compiling publicly available datasets to support ML model training. Going beyond existing previous reviews, this review focuses on how machine learning (ML) techniques, combined with multi-source data fusion (integrating remote sensing, IoT, and climate analytics), enhance precision agriculture by improving predictive accuracy and decision-making. Case studies and statistical insights illustrate the evolving landscape of AI driven smart farming, while future research directions also discusses challenges associated with data fusion for heterogeneous datasets. This review bridges the gap between AI research and agricultural applications, offering a roadmap for researchers, industry professionals, and policymakers to harness information fusion and ML for advancing precision agriculture. </p> </div> </dd> <dt> <a name='item183'>[183]</a> <a href ="/abs/2406.06652" title="Abstract" id="2406.06652"> arXiv:2406.06652 </a> (replaced) [<a href="/pdf/2406.06652" title="Download PDF" id="pdf-2406.06652" aria-labelledby="pdf-2406.06652">pdf</a>, <a href="https://arxiv.org/html/2406.06652v3" title="View HTML" id="html-2406.06652" aria-labelledby="html-2406.06652" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2406.06652" title="Other formats" id="oth-2406.06652" aria-labelledby="oth-2406.06652">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Improving Generalization of Neural Vehicle Routing Problem Solvers Through the Lens of Model Architecture </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Xiao,+Y">Yubin Xiao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+D">Di Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+X">Xuan Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+Y">Yuesong Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+B">Boyang Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Du,+W">Wei Du</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+L">Liupu Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+Y">You Zhou</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> This work has been accepted by Neural Networks </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Neural models produce promising results when solving Vehicle Routing Problems (VRPs), but often fall short in generalization. Recent attempts to enhance model generalization often incur unnecessarily large training cost or cannot be directly applied to other models solving different VRP variants. To address these issues, we take a novel perspective on model architecture in this study. Specifically, we propose a plug-and-play Entropy-based Scaling Factor (ESF) and a Distribution-Specific (DS) decoder to enhance the size and distribution generalization, respectively. ESF adjusts the attention weight pattern of the model towards familiar ones discovered during training when solving VRPs of varying sizes. The DS decoder explicitly models VRPs of multiple training distribution patterns through multiple auxiliary light decoders, expanding the model representation space to encompass a broader range of distributional scenarios. We conduct extensive experiments on both synthetic and widely recognized real-world benchmarking datasets and compare the performance with seven baseline models. The results demonstrate the effectiveness of using ESF and DS decoder to obtain a more generalizable model and showcase their applicability to solve different VRP variants, i.e., travelling salesman problem and capacitated VRP. Notably, our proposed generic components require minimal computational resources, and can be effortlessly integrated into conventional generalization strategies to further elevate model generalization. </p> </div> </dd> <dt> <a name='item184'>[184]</a> <a href ="/abs/2407.07066" title="Abstract" id="2407.07066"> arXiv:2407.07066 </a> (replaced) [<a href="/pdf/2407.07066" title="Download PDF" id="pdf-2407.07066" aria-labelledby="pdf-2407.07066">pdf</a>, <a href="https://arxiv.org/html/2407.07066v4" title="View HTML" id="html-2407.07066" aria-labelledby="html-2407.07066" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2407.07066" title="Other formats" id="oth-2407.07066" aria-labelledby="oth-2407.07066">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Explainable Differential Privacy-Hyperdimensional Computing for Balancing Privacy and Transparency in Additive Manufacturing Monitoring </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Piran,+F+J">Fardin Jalil Piran</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Poduval,+P+P">Prathyush P. Poduval</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Barkam,+H+E">Hamza Errahmouni Barkam</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Imani,+M">Mohsen Imani</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Imani,+F">Farhad Imani</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 30 pages, 14 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Cryptography and Security (cs.CR); Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> Machine Learning (ML) models integrated with in-situ sensing offer transformative solutions for defect detection in Additive Manufacturing (AM), but this integration brings critical challenges in safeguarding sensitive data, such as part designs and material compositions. Differential Privacy (DP), which introduces mathematically controlled noise, provides a balance between data utility and privacy. However, black-box Artificial Intelligence (AI) models often obscure how this noise impacts model accuracy, complicating the optimization of privacy-accuracy trade-offs. This study introduces the Differential Privacy-Hyperdimensional Computing (DP-HD) framework, a novel approach combining Explainable AI (XAI) and vector symbolic paradigms to quantify and predict noise effects on accuracy using a Signal-to-Noise Ratio (SNR) metric. DP-HD enables precise tuning of DP noise levels, ensuring an optimal balance between privacy and performance. The framework has been validated using real-world AM data, demonstrating its applicability to industrial environments. Experimental results demonstrate DP-HD's capability to achieve state-of-the-art accuracy (94.43%) with robust privacy protections in anomaly detection for AM, even under significant noise conditions. Beyond AM, DP-HD holds substantial promise for broader applications in privacy-sensitive domains such as healthcare, financial services, and government data management, where securing sensitive data while maintaining high ML performance is paramount. </p> </div> </dd> <dt> <a name='item185'>[185]</a> <a href ="/abs/2407.07357" title="Abstract" id="2407.07357"> arXiv:2407.07357 </a> (replaced) [<a href="/pdf/2407.07357" title="Download PDF" id="pdf-2407.07357" aria-labelledby="pdf-2407.07357">pdf</a>, <a href="https://arxiv.org/html/2407.07357v2" title="View HTML" id="html-2407.07357" aria-labelledby="html-2407.07357" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2407.07357" title="Other formats" id="oth-2407.07357" aria-labelledby="oth-2407.07357">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A deep graph model for the signed interaction prediction in biological network </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Jin,+S">Shuyi Jin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+M">Mengji Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+M">Meijie Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+L">Lun Yu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Molecular Networks (q-bio.MN) </div> <p class='mathjax'> Predicting signed interactions in biological networks is crucial for understanding drug mechanisms and facilitating drug repurposing. While deep graph models have demonstrated success in modeling complex biological systems, existing approaches often fail to distinguish between positive and negative interactions, limiting their utility for precise pharmacological predictions. In this study, we propose a novel deep graph model, \textbf{RGCNTD} (Relational Graph Convolutional Network with Tensor Decomposition), designed to predict both polar (e.g., activation, inhibition) and non-polar (e.g., binding, affect) chemical-gene interactions. Our model integrates graph convolutional networks with tensor decomposition to enhance feature representation and incorporates a conflict-aware sampling strategy to resolve polarity ambiguities. We introduce new evaluation metrics, \textit{AUC\textsubscript{polarity}} and \textit{CP@500}, to assess the model's ability to differentiate interaction types. Experimental results demonstrate that \textbf{RGCNTD} outperforms baseline models, achieving superior classification accuracy and improved discrimination of polar edges. Furthermore, we analyze the impact of subgraph components on predictive performance, revealing that additional network structures do not always enhance accuracy. These findings highlight the importance of polarity-aware modeling in drug discovery and network pharmacology, providing a robust framework for predicting complex biological interactions. </p> </div> </dd> <dt> <a name='item186'>[186]</a> <a href ="/abs/2407.13279" title="Abstract" id="2407.13279"> arXiv:2407.13279 </a> (replaced) [<a href="/pdf/2407.13279" title="Download PDF" id="pdf-2407.13279" aria-labelledby="pdf-2407.13279">pdf</a>, <a href="https://arxiv.org/html/2407.13279v2" title="View HTML" id="html-2407.13279" aria-labelledby="html-2407.13279" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2407.13279" title="Other formats" id="oth-2407.13279" aria-labelledby="oth-2407.13279">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Analyzing and Bridging the Gap between Maximizing Total Reward and Discounted Reward in Deep Reinforcement Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Yin,+S">Shuyu Yin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wen,+F">Fei Wen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+P">Peilin Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Luo,+T">Tao Luo</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> The optimal objective is a fundamental aspect of reinforcement learning (RL), as it determines how policies are evaluated and optimized. While total return maximization is the ideal objective in RL, discounted return maximization is the practical objective due to its stability. This can lead to a misalignment of objectives. To better understand the problem, we theoretically analyze the performance gap between the policy maximizes the total return and the policy maximizes the discounted return. Our analysis reveals that increasing the discount factor can be ineffective at eliminating this gap when environment contains cyclic states,a frequent scenario. To address this issue, we propose two alternative approaches to align the objectives. The first approach achieves alignment by modifying the terminal state value, treating it as a tunable hyper-parameter with its suitable range defined through theoretical analysis. The second approach focuses on calibrating the reward data in trajectories, enabling alignment in practical Deep RL applications using off-policy algorithms. This method enhances robustness to the discount factor and improve performance when the trajectory length is large. Our proposed methods demonstrate that adjusting reward data can achieve alignment, providing an insight that can be leveraged to design new optimization objectives to fundamentally enhance the performance of RL algorithms. </p> </div> </dd> <dt> <a name='item187'>[187]</a> <a href ="/abs/2407.17226" title="Abstract" id="2407.17226"> arXiv:2407.17226 </a> (replaced) [<a href="/pdf/2407.17226" title="Download PDF" id="pdf-2407.17226" aria-labelledby="pdf-2407.17226">pdf</a>, <a href="https://arxiv.org/html/2407.17226v3" title="View HTML" id="html-2407.17226" aria-labelledby="html-2407.17226" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2407.17226" title="Other formats" id="oth-2407.17226" aria-labelledby="oth-2407.17226">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Sublinear Regret for a Class of Continuous-Time Linear-Quadratic Reinforcement Learning Problems </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+Y">Yilie Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jia,+Y">Yanwei Jia</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+X+Y">Xun Yu Zhou</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 49 pages, 4 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Systems and Control (eess.SY); Optimization and Control (math.OC) </div> <p class='mathjax'> We study reinforcement learning (RL) for a class of continuous-time linear-quadratic (LQ) control problems for diffusions, where states are scalar-valued and running control rewards are absent but volatilities of the state processes depend on both state and control variables. We apply a model-free approach that relies neither on knowledge of model parameters nor on their estimations, and devise an RL algorithm to learn the optimal policy parameter directly. Our main contributions include the introduction of an exploration schedule and a regret analysis of the proposed algorithm. We provide the convergence rate of the policy parameter to the optimal one, and prove that the algorithm achieves a regret bound of $O(N^{\frac{3}{4}})$ up to a logarithmic factor, where $N$ is the number of learning episodes. We conduct a simulation study to validate the theoretical results and demonstrate the effectiveness and reliability of the proposed algorithm. We also perform numerical comparisons between our method and those of the recent model-based stochastic LQ RL studies adapted to the state- and control-dependent volatility setting, demonstrating a better performance of the former in terms of regret bounds. </p> </div> </dd> <dt> <a name='item188'>[188]</a> <a href ="/abs/2408.12526" title="Abstract" id="2408.12526"> arXiv:2408.12526 </a> (replaced) [<a href="/pdf/2408.12526" title="Download PDF" id="pdf-2408.12526" aria-labelledby="pdf-2408.12526">pdf</a>, <a href="https://arxiv.org/html/2408.12526v3" title="View HTML" id="html-2408.12526" aria-labelledby="html-2408.12526" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2408.12526" title="Other formats" id="oth-2408.12526" aria-labelledby="oth-2408.12526">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Exploiting Student Parallelism for Efficient GPU Inference of BERT-like Models in Online Services </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+W">Weiyan Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jin,+Y">Yilun Jin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Y">Yiming Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wei,+V+J">Victor Junqiu Wei</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tian,+H">Han Tian</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+L">Li Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xue,+J">Jinbao Xue</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tao,+Y">Yangyu Tao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+D">Di Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+K">Kai Chen</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> Due to high accuracy, BERT-like models have been widely adopted by text mining and web searching. However, large BERT-like models suffer from inefficient online inference, facing the following two problems on GPUs: (1) their high accuracy relies on the large model depth, which linearly increases the sequential computation on GPUs; (2) stochastic and dynamic online workloads cause extra costs from batching and paddings. Therefore, we present \sys for the real-world setting of GPU inference on online workloads. At its core, \sys adopts stacking distillation and boosting ensemble, distilling the original deep model into a group of shallow but virtually stacked student models running in parallel. This enables \sys to achieve a lower model depth (e.g., two layers) than the others and the lowest inference latency while maintaining accuracy. In addition, adaptive student pruning realizes dynamic student numbers according to changing online workloads. Especially for occasional workload bursts, it can temporarily decrease the student number with minimal accuracy loss to improve system throughput. We conduct comprehensive experiments to verify the effectiveness, whose results show that \sys outperforms the baselines by $4.1\times\sim 1.6\times$ in latency while maintaining accuracy and achieves up to $22.27\times$ higher throughput for workload bursts. </p> </div> </dd> <dt> <a name='item189'>[189]</a> <a href ="/abs/2409.13936" title="Abstract" id="2409.13936"> arXiv:2409.13936 </a> (replaced) [<a href="/pdf/2409.13936" title="Download PDF" id="pdf-2409.13936" aria-labelledby="pdf-2409.13936">pdf</a>, <a href="https://arxiv.org/html/2409.13936v2" title="View HTML" id="html-2409.13936" aria-labelledby="html-2409.13936" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2409.13936" title="Other formats" id="oth-2409.13936" aria-labelledby="oth-2409.13936">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> High-Resolution Flood Probability Mapping Using Generative Machine Learning with Large-Scale Synthetic Precipitation and Inundation Data </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+L">Lipai Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Antolini,+F">Federico Antolini</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mostafavi,+A">Ali Mostafavi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Blessing,+R">Russell Blessing</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Garcia,+M">Matthew Garcia</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Brody,+S+D">Samuel D. Brody</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> High-resolution flood probability maps are instrumental for assessing flood risk but are often limited by the availability of historical data. Additionally, producing simulated data needed for creating probabilistic flood maps using physics-based models involves significant computation and time effort, which inhibit its feasibility. To address this gap, this study introduces Precipitation-Flood Depth Generative Pipeline, a novel methodology that leverages generative machine learning to generate large-scale synthetic inundation data to produce probabilistic flood maps. With a focus on Harris County, Texas, Precipitation-Flood Depth Generative Pipeline begins with training a cell-wise depth estimator using a number of precipitation-flood events model with a physics-based model. This cell-wise depth estimator, which emphasizes precipitation-based features, outperforms universal models. Subsequently, the Conditional Generative Adversarial Network (CTGAN) is used to conditionally generate synthetic precipitation point cloud, which are filtered using strategic thresholds to align with realistic precipitation patterns. Hence, a precipitation feature pool is constructed for each cell, enabling strategic sampling and the generation of synthetic precipitation events. After generating 10,000 synthetic events, flood probability maps are created for various inundation depths. Validation using similarity and correlation metrics confirms the accuracy of the synthetic depth distributions. The Precipitation-Flood Depth Generative Pipeline provides a scalable solution to generate synthetic flood depth data needed for high-resolution flood probability maps, which can enhance flood mitigation planning. </p> </div> </dd> <dt> <a name='item190'>[190]</a> <a href ="/abs/2409.19606" title="Abstract" id="2409.19606"> arXiv:2409.19606 </a> (replaced) [<a href="/pdf/2409.19606" title="Download PDF" id="pdf-2409.19606" aria-labelledby="pdf-2409.19606">pdf</a>, <a href="https://arxiv.org/html/2409.19606v3" title="View HTML" id="html-2409.19606" aria-labelledby="html-2409.19606" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2409.19606" title="Other formats" id="oth-2409.19606" aria-labelledby="oth-2409.19606">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Hyper-Connections </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+D">Defa Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+H">Hongzhi Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+Z">Zihao Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zeng,+Y">Yutao Zeng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mao,+Y">Yunyao Mao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+B">Banggu Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Min,+Q">Qiyang Min</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+X">Xun Zhou</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Computation and Language (cs.CL); Computer Vision and Pattern Recognition (cs.CV); Neural and Evolutionary Computing (cs.NE) </div> <p class='mathjax'> We present hyper-connections, a simple yet effective method that can serve as an alternative to residual connections. This approach specifically addresses common drawbacks observed in residual connection variants, such as the seesaw effect between gradient vanishing and representation collapse. Theoretically, hyper-connections allow the network to adjust the strength of connections between features at different depths and dynamically rearrange layers. We conduct experiments focusing on the pre-training of large language models, including dense and sparse models, where hyper-connections show significant performance improvements over residual connections. Additional experiments conducted on vision tasks also demonstrate similar improvements. We anticipate that this method will be broadly applicable and beneficial across a wide range of AI problems. </p> </div> </dd> <dt> <a name='item191'>[191]</a> <a href ="/abs/2410.01706" title="Abstract" id="2410.01706"> arXiv:2410.01706 </a> (replaced) [<a href="/pdf/2410.01706" title="Download PDF" id="pdf-2410.01706" aria-labelledby="pdf-2410.01706">pdf</a>, <a href="https://arxiv.org/html/2410.01706v4" title="View HTML" id="html-2410.01706" aria-labelledby="html-2410.01706" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.01706" title="Other formats" id="oth-2410.01706" aria-labelledby="oth-2410.01706">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Sable: a Performant, Efficient and Scalable Sequence Model for MARL </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Mahjoub,+O">Omayma Mahjoub</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Abramowitz,+S">Sasha Abramowitz</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=de+Kock,+R">Ruan de Kock</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Khlifi,+W">Wiem Khlifi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Toit,+S+d">Simon du Toit</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Daniel,+J">Jemma Daniel</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nessir,+L+B">Louay Ben Nessir</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Beyers,+L">Louise Beyers</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Formanek,+C">Claude Formanek</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Clark,+L">Liam Clark</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pretorius,+A">Arnu Pretorius</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Multiagent Systems (cs.MA) </div> <p class='mathjax'> As multi-agent reinforcement learning (MARL) progresses towards solving larger and more complex problems, it becomes increasingly important that algorithms exhibit the key properties of (1) strong performance, (2) memory efficiency and (3) scalability. In this work, we introduce Sable, a performant, memory efficient and scalable sequence modeling approach to MARL. Sable works by adapting the retention mechanism in Retentive Networks (Sun et al., 2023) to achieve computationally efficient processing of multi-agent observations with long context memory for temporal reasoning. Through extensive evaluations across six diverse environments, we demonstrate how Sable is able to significantly outperform existing state-of-the-art methods in a large number of diverse tasks (34 out of 45 tested). Furthermore, Sable maintains performance as we scale the number of agents, handling environments with more than a thousand agents while exhibiting a linear increase in memory usage. Finally, we conduct ablation studies to isolate the source of Sable's performance gains and confirm its efficient computational memory usage. </p> </div> </dd> <dt> <a name='item192'>[192]</a> <a href ="/abs/2410.07933" title="Abstract" id="2410.07933"> arXiv:2410.07933 </a> (replaced) [<a href="/pdf/2410.07933" title="Download PDF" id="pdf-2410.07933" aria-labelledby="pdf-2410.07933">pdf</a>, <a href="/format/2410.07933" title="Other formats" id="oth-2410.07933" aria-labelledby="oth-2410.07933">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Offline Hierarchical Reinforcement Learning via Inverse Optimization </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Schmidt,+C">Carolin Schmidt</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gammelli,+D">Daniele Gammelli</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Harrison,+J">James Harrison</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pavone,+M">Marco Pavone</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rodrigues,+F">Filipe Rodrigues</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Systems and Control (eess.SY); Optimization and Control (math.OC) </div> <p class='mathjax'> Hierarchical policies enable strong performance in many sequential decision-making problems, such as those with high-dimensional action spaces, those requiring long-horizon planning, and settings with sparse rewards. However, learning hierarchical policies from static offline datasets presents a significant challenge. Crucially, actions taken by higher-level policies may not be directly observable within hierarchical controllers, and the offline dataset might have been generated using a different policy structure, hindering the use of standard offline learning algorithms. In this work, we propose OHIO: a framework for offline reinforcement learning (RL) of hierarchical policies. Our framework leverages knowledge of the policy structure to solve the \textit{inverse problem}, recovering the unobservable high-level actions that likely generated the observed data under our hierarchical policy. This approach constructs a dataset suitable for off-the-shelf offline training. We demonstrate our framework on robotic and network optimization problems and show that it substantially outperforms end-to-end RL methods and improves robustness. We investigate a variety of instantiations of our framework, both in direct deployment of policies trained offline and when online fine-tuning is performed. Code and data are available at <a href="https://ohio-offline-hierarchical-rl.github.io" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </p> </div> </dd> <dt> <a name='item193'>[193]</a> <a href ="/abs/2410.09408" title="Abstract" id="2410.09408"> arXiv:2410.09408 </a> (replaced) [<a href="/pdf/2410.09408" title="Download PDF" id="pdf-2410.09408" aria-labelledby="pdf-2410.09408">pdf</a>, <a href="/format/2410.09408" title="Other formats" id="oth-2410.09408" aria-labelledby="oth-2410.09408">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> C-Adapter: Adapting Deep Classifiers for Efficient Conformal Prediction Sets </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+K">Kangdao Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zeng,+H">Hao Zeng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+J">Jianguo Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhuang,+H">Huiping Zhuang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Vong,+C">Chi-Man Vong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wei,+H">Hongxin Wei</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> The experimental results are not sufficient </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> Conformal prediction, as an emerging uncertainty quantification technique, typically functions as post-hoc processing for the outputs of trained classifiers. To optimize the classifier for maximum predictive efficiency, Conformal Training rectifies the training objective with a regularization that minimizes the average prediction set size at a specific error rate. However, the regularization term inevitably deteriorates the classification accuracy and leads to suboptimal efficiency of conformal predictors. To address this issue, we introduce \textbf{Conformal Adapter} (C-Adapter), an adapter-based tuning method to enhance the efficiency of conformal predictors without sacrificing accuracy. In particular, we implement the adapter as a class of intra order-preserving functions and tune it with our proposed loss that maximizes the discriminability of non-conformity scores between correctly and randomly matched data-label pairs. Using C-Adapter, the model tends to produce extremely high non-conformity scores for incorrect labels, thereby enhancing the efficiency of prediction sets across different coverage rates. Extensive experiments demonstrate that C-Adapter can effectively adapt various classifiers for efficient prediction sets, as well as enhance the conformal training method. </p> </div> </dd> <dt> <a name='item194'>[194]</a> <a href ="/abs/2410.12360" title="Abstract" id="2410.12360"> arXiv:2410.12360 </a> (replaced) [<a href="/pdf/2410.12360" title="Download PDF" id="pdf-2410.12360" aria-labelledby="pdf-2410.12360">pdf</a>, <a href="https://arxiv.org/html/2410.12360v3" title="View HTML" id="html-2410.12360" aria-labelledby="html-2410.12360" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.12360" title="Other formats" id="oth-2410.12360" aria-labelledby="oth-2410.12360">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Towards Neural Scaling Laws for Time Series Foundation Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Yao,+Q">Qingren Yao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+C+H">Chao-Han Huck Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jiang,+R">Renhe Jiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liang,+Y">Yuxuan Liang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jin,+M">Ming Jin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pan,+S">Shirui Pan</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted by the 13th International Conference on Learning Representations (ICLR 2025) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Scaling laws offer valuable insights into the design of time series foundation models (TSFMs). However, previous research has largely focused on the scaling laws of TSFMs for in-distribution (ID) data, leaving their out-of-distribution (OOD) scaling behavior and the influence of model architectures less explored. In this work, we examine two common TSFM architectures, encoder-only and decoder-only Transformers, and investigate their scaling behavior on both ID and OOD data. These models are trained and evaluated across varying parameter counts, compute budgets, and dataset sizes. Our experiments reveal that the log-likelihood loss of TSFMs exhibits similar scaling behavior in both OOD and ID settings. We further compare the scaling properties across different architectures, incorporating two state-of-the-art TSFMs as case studies, showing that model architecture plays a significant role in scaling. The encoder-only Transformers demonstrate better scalability than the decoder-only Transformers, while the architectural enhancements in the two advanced TSFMs primarily improve ID performance but reduce OOD scalability. While scaling up TSFMs is expected to drive performance breakthroughs, the lack of a comprehensive understanding of TSFM scaling laws has hindered the development of a robust framework to guide model scaling. We fill this gap in this work by synthesizing our findings and providing practical guidelines for designing and scaling larger TSFMs with enhanced model capabilities. </p> </div> </dd> <dt> <a name='item195'>[195]</a> <a href ="/abs/2410.12730" title="Abstract" id="2410.12730"> arXiv:2410.12730 </a> (replaced) [<a href="/pdf/2410.12730" title="Download PDF" id="pdf-2410.12730" aria-labelledby="pdf-2410.12730">pdf</a>, <a href="https://arxiv.org/html/2410.12730v3" title="View HTML" id="html-2410.12730" aria-labelledby="html-2410.12730" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.12730" title="Other formats" id="oth-2410.12730" aria-labelledby="oth-2410.12730">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Counterfactual Generative Modeling with Variational Causal Inference </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+Y">Yulun Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=McConnell,+L">Louie McConnell</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Iriondo,+C">Claudia Iriondo</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Published as a conference paper at ICLR 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Statistics Theory (math.ST); Machine Learning (stat.ML) </div> <p class='mathjax'> Estimating an individual's counterfactual outcomes under interventions is a challenging task for traditional causal inference and supervised learning approaches when the outcome is high-dimensional (e.g. gene expressions, facial images) and covariates are relatively limited. In this case, to predict one's outcomes under counterfactual treatments, it is crucial to leverage individual information contained in the observed outcome in addition to the covariates. Prior works using variational inference in counterfactual generative modeling have been focusing on neural adaptations and model variants within the conditional variational autoencoder formulation, which we argue is fundamentally ill-suited to the notion of counterfactual in causal inference. In this work, we present a novel variational Bayesian causal inference framework and its theoretical backings to properly handle counterfactual generative modeling tasks, through which we are able to conduct counterfactual supervision end-to-end during training without any counterfactual samples, and encourage disentangled exogenous noise abduction that aids the correct identification of causal effect in counterfactual generations. In experiments, we demonstrate the advantage of our framework compared to state-of-the-art models in counterfactual generative modeling on multiple benchmarks. </p> </div> </dd> <dt> <a name='item196'>[196]</a> <a href ="/abs/2410.16713" title="Abstract" id="2410.16713"> arXiv:2410.16713 </a> (replaced) [<a href="/pdf/2410.16713" title="Download PDF" id="pdf-2410.16713" aria-labelledby="pdf-2410.16713">pdf</a>, <a href="https://arxiv.org/html/2410.16713v4" title="View HTML" id="html-2410.16713" aria-labelledby="html-2410.16713" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.16713" title="Other formats" id="oth-2410.16713" aria-labelledby="oth-2410.16713">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Collapse or Thrive? Perils and Promises of Synthetic Data in a Self-Generating World </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Kazdan,+J">Joshua Kazdan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Schaeffer,+R">Rylan Schaeffer</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dey,+A">Apratim Dey</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gerstgrasser,+M">Matthias Gerstgrasser</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rafailov,+R">Rafael Rafailov</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Donoho,+D+L">David L. Donoho</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Koyejo,+S">Sanmi Koyejo</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted at NeurIPS 2024 Workshops: Mathematics of Modern Machine Learning (M3L) and Attributing Model Behavior at Scale (ATTRIB) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> What happens when generative machine learning models are pretrained on web-scale datasets containing data generated by earlier models? Some prior work warns of "model collapse" as the web is overwhelmed by synthetic data; other work suggests the problem can be contained (i.e. collapse can be avoided) by managing how available data are used in pretraining. In this paper, we report experiments on three ways of using data (training-workflows), across three generative model task-settings (multivariate Gaussian estimation, kernel density estimation, and language-model fine-tuning) to further confirm the possibility of containment: (a) we confirm that the training-workflow of {\it replacing} all real data by successive generations of purely synthetic data indeed suffers model collapse in all task-settings studied; (b) we consider the training-workflow of {\it accumulating} synthetic data alongside real data and training on all data combined and confirming that, although the proportion of real data eventually becomes zero, models remain stable and their test losses do not diverge under this training-workflow; (c) we consider a training-workflow where real and synthetic data accumulate together but successive generations of pretraining are constrained to use fixed-size data subsets each generation. In this workflow, we observe slow and gradual rather than explosive degradation of test loss performance across generations. Our insights are particularly important when forecasting whether future frontier generative models will collapse or thrive, and our results open avenues for empirically and mathematically studying the context-dependent value of synthetic data. </p> </div> </dd> <dt> <a name='item197'>[197]</a> <a href ="/abs/2410.17263" title="Abstract" id="2410.17263"> arXiv:2410.17263 </a> (replaced) [<a href="/pdf/2410.17263" title="Download PDF" id="pdf-2410.17263" aria-labelledby="pdf-2410.17263">pdf</a>, <a href="/format/2410.17263" title="Other formats" id="oth-2410.17263" aria-labelledby="oth-2410.17263">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> An Effective Theory of Bias Amplification </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Subramonian,+A">Arjun Subramonian</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bell,+S+J">Samuel J. Bell</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sagun,+L">Levent Sagun</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dohmatob,+E">Elvis Dohmatob</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted to ICLR 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Computers and Society (cs.CY); Machine Learning (stat.ML) </div> <p class='mathjax'> Machine learning models can capture and amplify biases present in data, leading to disparate test performance across social groups. To better understand, evaluate, and mitigate these biases, a deeper theoretical understanding of how model design choices and data distribution properties contribute to bias is needed. In this work, we contribute a precise analytical theory in the context of ridge regression, both with and without random projections, where the former models feedforward neural networks in a simplified regime. Our theory offers a unified and rigorous explanation of machine learning bias, providing insights into phenomena such as bias amplification and minority-group bias in various feature and parameter regimes. For example, we observe that there may be an optimal regularization penalty or training time to avoid bias amplification, and there can be differences in test error between groups that are not alleviated with increased parameterization. Importantly, our theoretical predictions align with empirical observations reported in the literature on machine learning bias. We extensively empirically validate our theory on synthetic and semi-synthetic datasets. </p> </div> </dd> <dt> <a name='item198'>[198]</a> <a href ="/abs/2411.12537" title="Abstract" id="2411.12537"> arXiv:2411.12537 </a> (replaced) [<a href="/pdf/2411.12537" title="Download PDF" id="pdf-2411.12537" aria-labelledby="pdf-2411.12537">pdf</a>, <a href="https://arxiv.org/html/2411.12537v5" title="View HTML" id="html-2411.12537" aria-labelledby="html-2411.12537" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.12537" title="Other formats" id="oth-2411.12537" aria-labelledby="oth-2411.12537">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Unlocking State-Tracking in Linear RNNs Through Negative Eigenvalues </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Grazzi,+R">Riccardo Grazzi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Siems,+J">Julien Siems</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zela,+A">Arber Zela</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Franke,+J+K">J枚rg K.H. Franke</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hutter,+F">Frank Hutter</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pontil,+M">Massimiliano Pontil</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> V2: Correction to Theorem 1 and 2 and to point 3 of Proposition 1. V3: ICLR Camera Ready, V4: ICLR Camera Ready, added figures to theory section, updated modular arithmetic with brackets results because previous results did not contain multiplication </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Computation and Language (cs.CL); Formal Languages and Automata Theory (cs.FL) </div> <p class='mathjax'> Linear Recurrent Neural Networks (LRNNs) such as Mamba, RWKV, GLA, mLSTM, and DeltaNet have emerged as efficient alternatives to Transformers for long sequences. However, both Transformers and LRNNs struggle to perform state-tracking, which may impair performance in tasks such as code evaluation. In one forward pass, current architectures are unable to solve even parity, the simplest state-tracking task, which non-linear RNNs can handle effectively. Recently, Sarrof et al. (2024) demonstrated that the failure of LRNNs like Mamba to solve parity stems from restricting the value range of their diagonal state-transition matrices to $[0, 1]$ and that incorporating negative values can resolve this issue. We extend this result to non-diagonal LRNNs such as DeltaNet. We prove that finite precision LRNNs with state-transition matrices having only positive eigenvalues cannot solve parity, while non-triangular matrices are needed to count modulo $3$. Notably, we also prove that LRNNs can learn any regular language when their state-transition matrices are products of identity minus vector outer product matrices, each with eigenvalues in the range $[-1, 1]$. Our experiments confirm that extending the eigenvalue range of Mamba and DeltaNet to include negative values not only enables them to solve parity but consistently improves their performance on state-tracking tasks. We also show that state-tracking enabled LRNNs can be pretrained stably and efficiently at scale (1.3B parameters), achieving competitive performance on language modeling and showing promise on code and math tasks. </p> </div> </dd> <dt> <a name='item199'>[199]</a> <a href ="/abs/2411.15292" title="Abstract" id="2411.15292"> arXiv:2411.15292 </a> (replaced) [<a href="/pdf/2411.15292" title="Download PDF" id="pdf-2411.15292" aria-labelledby="pdf-2411.15292">pdf</a>, <a href="https://arxiv.org/html/2411.15292v2" title="View HTML" id="html-2411.15292" aria-labelledby="html-2411.15292" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.15292" title="Other formats" id="oth-2411.15292" aria-labelledby="oth-2411.15292">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Influence functions and regularity tangents for efficient active learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Eaton,+F">Frederik Eaton</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 37 pages, 4 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Statistics Theory (math.ST); Machine Learning (stat.ML) </div> <p class='mathjax'> In this paper we describe an efficient method for providing a regression model with a sense of curiosity about its data. In the field of machine learning, our framework for representing curiosity is called Active Learning, which concerns the problem of automatically choosing data points for which to query labels in the semi-supervised setting. The methods we propose are based on computing a "regularity tangent" vector that can be calculated (with only a constant slow-down) together with the model's parameter vector during training. We then take the inner product of this tangent vector with the gradient vector of the model's loss at a given data point to obtain a measure of the influence of that point on the complexity of the model. In the simplest instantiation, there is only a single regularity tangent vector, of the same dimension as the parameter vector. Thus, in the proposed technique, once training is complete, evaluating our "curiosity" about a potential query data point can be done as quickly as calculating the model's loss gradient at that point. The new vector only doubles the amount of storage required by the model. We show that the quantity computed by our technique is an example of an "influence function", and that it measures the expected squared change in model complexity incurred by up-weighting a given data point. We propose a number of ways for using this and other related quantities to choose new training data points for a regression model. </p> </div> </dd> <dt> <a name='item200'>[200]</a> <a href ="/abs/2411.17595" title="Abstract" id="2411.17595"> arXiv:2411.17595 </a> (replaced) [<a href="/pdf/2411.17595" title="Download PDF" id="pdf-2411.17595" aria-labelledby="pdf-2411.17595">pdf</a>, <a href="https://arxiv.org/html/2411.17595v2" title="View HTML" id="html-2411.17595" aria-labelledby="html-2411.17595" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.17595" title="Other formats" id="oth-2411.17595" aria-labelledby="oth-2411.17595">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Can artificial intelligence predict clinical trial outcomes? </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Jin,+S">Shuyi Jin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+L">Lu Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ding,+H">Hongru Ding</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+M">Meijie Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+L">Lun Yu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Applications (stat.AP) </div> <p class='mathjax'> This study evaluates the performance of large language models (LLMs) and the HINT model in predicting clinical trial outcomes, focusing on metrics including Balanced Accuracy, Matthews Correlation Coefficient (MCC), Recall, and Specificity. Results show that GPT-4o achieves superior overall performance among LLMs but, like its counterparts (GPT-3.5, GPT-4mini, Llama3), struggles with identifying negative outcomes. In contrast, HINT excels in negative sample recognition and demonstrates resilience to external factors (e.g., recruitment challenges) but underperforms in oncology trials, a major component of the dataset. LLMs exhibit strengths in early-phase trials and simpler endpoints like Overall Survival (OS), while HINT shows consistency across trial phases and excels in complex endpoints (e.g., Objective Response Rate). Trial duration analysis reveals improved model performance for medium- to long-term trials, with GPT-4o and HINT displaying stability and enhanced specificity, respectively. We underscore the complementary potential of LLMs (e.g., GPT-4o, Llama3) and HINT, advocating for hybrid approaches to leverage GPT-4o's predictive power and HINT's specificity in clinical trial outcome forecasting. </p> </div> </dd> <dt> <a name='item201'>[201]</a> <a href ="/abs/2411.18425" title="Abstract" id="2411.18425"> arXiv:2411.18425 </a> (replaced) [<a href="/pdf/2411.18425" title="Download PDF" id="pdf-2411.18425" aria-labelledby="pdf-2411.18425">pdf</a>, <a href="/format/2411.18425" title="Other formats" id="oth-2411.18425" aria-labelledby="oth-2411.18425">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Streamlining Prediction in Bayesian Deep Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+R">Rui Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Klasson,+M">Marcus Klasson</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Solin,+A">Arno Solin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Trapp,+M">Martin Trapp</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> The rising interest in Bayesian deep learning (BDL) has led to a plethora of methods for estimating the posterior distribution. However, efficient computation of inferences, such as predictions, has been largely overlooked with Monte Carlo integration remaining the standard. In this work we examine streamlining prediction in BDL through a single forward pass without sampling. For this we use local linearisation on activation functions and local Gaussian approximations at linear layers. Thus allowing us to analytically compute an approximation to the posterior predictive distribution. We showcase our approach for both MLP and transformers, such as ViT and GPT-2, and assess its performance on regression and classification tasks. </p> </div> </dd> <dt> <a name='item202'>[202]</a> <a href ="/abs/2412.05994" title="Abstract" id="2412.05994"> arXiv:2412.05994 </a> (replaced) [<a href="/pdf/2412.05994" title="Download PDF" id="pdf-2412.05994" aria-labelledby="pdf-2412.05994">pdf</a>, <a href="https://arxiv.org/html/2412.05994v3" title="View HTML" id="html-2412.05994" aria-labelledby="html-2412.05994" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2412.05994" title="Other formats" id="oth-2412.05994" aria-labelledby="oth-2412.05994">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> PIG: Physics-Informed Gaussians as Adaptive Parametric Mesh Representations </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Kang,+N">Namgyu Kang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Oh,+J">Jaemin Oh</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hong,+Y">Youngjoon Hong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Park,+E">Eunbyung Park</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted by ICLR 2025. Project page: <a href="https://namgyukang.github.io/Physics-Informed-Gaussians/" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> The numerical approximation of partial differential equations (PDEs) using neural networks has seen significant advancements through Physics-Informed Neural Networks (PINNs). Despite their straightforward optimization framework and flexibility in implementing various PDEs, PINNs often suffer from limited accuracy due to the spectral bias of Multi-Layer Perceptrons (MLPs), which struggle to effectively learn high-frequency and nonlinear components. Recently, parametric mesh representations in combination with neural networks have been investigated as a promising approach to eliminate the inductive bias of MLPs. However, they usually require high-resolution grids and a large number of collocation points to achieve high accuracy while avoiding overfitting. In addition, the fixed positions of the mesh parameters restrict their flexibility, making accurate approximation of complex PDEs challenging. To overcome these limitations, we propose Physics-Informed Gaussians (PIGs), which combine feature embeddings using Gaussian functions with a lightweight neural network. Our approach uses trainable parameters for the mean and variance of each Gaussian, allowing for dynamic adjustment of their positions and shapes during training. This adaptability enables our model to optimally approximate PDE solutions, unlike models with fixed parameter positions. Furthermore, the proposed approach maintains the same optimization framework used in PINNs, allowing us to benefit from their excellent properties. Experimental results show the competitive performance of our model across various PDEs, demonstrating its potential as a robust tool for solving complex PDEs. Our project page is available at <a href="https://namgyukang.github.io/Physics-Informed-Gaussians/" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </p> </div> </dd> <dt> <a name='item203'>[203]</a> <a href ="/abs/2412.12370" title="Abstract" id="2412.12370"> arXiv:2412.12370 </a> (replaced) [<a href="/pdf/2412.12370" title="Download PDF" id="pdf-2412.12370" aria-labelledby="pdf-2412.12370">pdf</a>, <a href="/format/2412.12370" title="Other formats" id="oth-2412.12370" aria-labelledby="oth-2412.12370">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Scam Detection for Ethereum Smart Contracts: Leveraging Graph Representation Learning for Secure Blockchain </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Jin,+Y">Yihong Jin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+Z">Ze Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+X">Xinhe Xu</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted to ISCAIT 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Cryptography and Security (cs.CR); Distributed, Parallel, and Cluster Computing (cs.DC); Social and Information Networks (cs.SI) </div> <p class='mathjax'> Due to the increasing abuse of fraudulent activities that result in significant financial and reputational harm, Ethereum smart contracts face a significant problem in detecting fraud. Existing monitoring methods typically rely on lease code analysis or physically extracted features, which suffer from scalability and adaptability limitations. In this study, we use graph representation learning to observe purchase trends and find fraudulent deals. We can achieve powerful categorisation performance by using innovative machine learning versions and transforming Ethereum invoice data into graph structures. Our method addresses label imbalance through SMOTE-ENN techniques and evaluates models like Multi-Layer Perceptron ( MLP ) and Graph Convolutional Networks ( GCN). Experimental results show that the MLP type surpasses the GCN in this environment, with domain-specific assessments closely aligned with real-world assessments. This study provides a scalable and efficient way to improve Ethereum's ecosystem's confidence and security. </p> </div> </dd> <dt> <a name='item204'>[204]</a> <a href ="/abs/2412.12687" title="Abstract" id="2412.12687"> arXiv:2412.12687 </a> (replaced) [<a href="/pdf/2412.12687" title="Download PDF" id="pdf-2412.12687" aria-labelledby="pdf-2412.12687">pdf</a>, <a href="https://arxiv.org/html/2412.12687v3" title="View HTML" id="html-2412.12687" aria-labelledby="html-2412.12687" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2412.12687" title="Other formats" id="oth-2412.12687" aria-labelledby="oth-2412.12687">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Uncertainty-Aware Hybrid Inference with On-Device Small and Remote Large Language Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Oh,+S">Seungeun Oh</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+J">Jinhyuk Kim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Park,+J">Jihong Park</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ko,+S">Seung-Woo Ko</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Quek,+T+Q+S">Tony Q. S. Quek</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+S">Seong-Lyun Kim</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 7 pages, 6 figures; to be presented at IEEE International Conference on Machine Learning for Communication and Networking (ICMLCN) 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Distributed, Parallel, and Cluster Computing (cs.DC); Information Theory (cs.IT); Networking and Internet Architecture (cs.NI); Signal Processing (eess.SP) </div> <p class='mathjax'> This paper studies a hybrid language model (HLM) architecture that integrates a small language model (SLM) operating on a mobile device with a large language model (LLM) hosted at the base station (BS) of a wireless network. The HLM token generation process follows the speculative inference principle: the SLM's vocabulary distribution is uploaded to the LLM, which either accepts or rejects it, with rejected tokens being resampled by the LLM. While this approach ensures alignment between the vocabulary distributions of the SLM and LLM, it suffers from low token throughput due to uplink transmission and the computation costs of running both language models. To address this, we propose a novel HLM structure coined Uncertainty-aware opportunistic HLM (U-HLM), wherein the SLM locally measures its output uncertainty and skips both uplink transmissions and LLM operations for tokens that are likely to be accepted. This opportunistic skipping is enabled by our empirical finding of a linear correlation between the SLM's uncertainty and the LLM's rejection probability. We analytically derive the uncertainty threshold and evaluate its expected risk of rejection. Simulations show that U-HLM reduces uplink transmissions and LLM computations by 45.93%, while achieving up to 97.54% of the LLM's inference accuracy and 2.54$\times$ faster token throughput than HLM without skipping. </p> </div> </dd> <dt> <a name='item205'>[205]</a> <a href ="/abs/2501.05661" title="Abstract" id="2501.05661"> arXiv:2501.05661 </a> (replaced) [<a href="/pdf/2501.05661" title="Download PDF" id="pdf-2501.05661" aria-labelledby="pdf-2501.05661">pdf</a>, <a href="https://arxiv.org/html/2501.05661v2" title="View HTML" id="html-2501.05661" aria-labelledby="html-2501.05661" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2501.05661" title="Other formats" id="oth-2501.05661" aria-labelledby="oth-2501.05661">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> TAMER: A Test-Time Adaptive MoE-Driven Framework for EHR Representation Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+Y">Yinghao Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zheng,+X">Xiaochen Zheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Allam,+A">Ahmed Allam</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Krauthammer,+M">Michael Krauthammer</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 8 pages, 3 figures, 7 tables. Code is available at: <a href="https://github.com/yhzhu99/TAMER" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> We propose TAMER, a Test-time Adaptive MoE-driven framework for Electronic Health Record (EHR) Representation learning. TAMER introduces a framework where a Mixture-of-Experts (MoE) architecture is co-designed with Test-Time Adaptation (TTA) to jointly mitigate the intertwined challenges of patient heterogeneity and distribution shifts in EHR modeling. The MoE focuses on latent patient subgroups through domain-aware expert specialization, while TTA enables real-time adaptation to evolving health status distributions when new patient samples are introduced. Extensive experiments across four real-world EHR datasets demonstrate that TAMER consistently improves predictive performance for both mortality and readmission risk tasks when combined with diverse EHR modeling backbones. TAMER offers a promising approach for dynamic and personalized EHR-based predictions in practical clinical settings. </p> </div> </dd> <dt> <a name='item206'>[206]</a> <a href ="/abs/2501.08669" title="Abstract" id="2501.08669"> arXiv:2501.08669 </a> (replaced) [<a href="/pdf/2501.08669" title="Download PDF" id="pdf-2501.08669" aria-labelledby="pdf-2501.08669">pdf</a>, <a href="https://arxiv.org/html/2501.08669v2" title="View HTML" id="html-2501.08669" aria-labelledby="html-2501.08669" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2501.08669" title="Other formats" id="oth-2501.08669" aria-labelledby="oth-2501.08669">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> SPEQ: Offline Stabilization Phases for Efficient Q-Learning in High Update-To-Data Ratio Reinforcement Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Romeo,+C">Carlo Romeo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Macaluso,+G">Girolamo Macaluso</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sestini,+A">Alessandro Sestini</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bagdanov,+A+D">Andrew D. Bagdanov</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> High update-to-data (UTD) ratio algorithms in reinforcement learning (RL) improve sample efficiency but incur high computational costs, limiting real-world scalability. We propose Offline Stabilization Phases for Efficient Q-Learning (SPEQ), an RL algorithm that combines low-UTD online training with periodic offline stabilization phases. During these phases, Q-functions are fine-tuned with high UTD ratios on a fixed replay buffer, reducing redundant updates on suboptimal data. This structured training schedule optimally balances computational and sample efficiency, addressing the limitations of both high and low UTD ratio approaches. We empirically demonstrate that SPEQ requires from 40% to 99% fewer gradient updates and 27% to 78% less training time compared to state-of-the-art high UTD ratio methods while maintaining or surpassing their performance on the MuJoCo continuous control benchmark. Our findings highlight the potential of periodic stabilization phases as an effective alternative to conventional training schedules, paving the way for more scalable reinforcement learning solutions in real-world applications where computational resources are constrained. </p> </div> </dd> <dt> <a name='item207'>[207]</a> <a href ="/abs/2501.14009" title="Abstract" id="2501.14009"> arXiv:2501.14009 </a> (replaced) [<a href="/pdf/2501.14009" title="Download PDF" id="pdf-2501.14009" aria-labelledby="pdf-2501.14009">pdf</a>, <a href="https://arxiv.org/html/2501.14009v2" title="View HTML" id="html-2501.14009" aria-labelledby="html-2501.14009" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2501.14009" title="Other formats" id="oth-2501.14009" aria-labelledby="oth-2501.14009">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Scalable and Interpretable Verification of Image-based Neural Network Controllers for Autonomous Vehicles </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Parameshwaran,+A">Aditya Parameshwaran</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Y">Yue Wang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 11 pages, 5 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Systems and Control (eess.SY) </div> <p class='mathjax'> Existing formal verification methods for image-based neural network controllers in autonomous vehicles often struggle with high-dimensional inputs, computational inefficiency, and a lack of explainability. These challenges make it difficult to ensure safety and reliability, as processing high-dimensional image data is computationally intensive and neural networks are typically treated as black boxes. To address these issues, we propose SEVIN (Scalable and Explainable Verification of Image-Based Neural Network Controllers), a framework that leverages a Variational Autoencoders (VAE) to encode high-dimensional images into a lower-dimensional, explainable latent space. By annotating latent variables with corresponding control actions, we generate convex polytopes that serve as structured input spaces for verification, significantly reducing computational complexity and enhancing scalability. Integrating the VAE's decoder with the neural network controller allows for formal and robustness verification using these explainable polytopes. Our approach also incorporates robustness verification under real-world perturbations by augmenting the dataset and retraining the VAE to capture environmental variations. Experimental results demonstrate that SEVIN achieves efficient and scalable verification while providing explainable insights into controller behavior, bridging the gap between formal verification techniques and practical applications in safety-critical systems. </p> </div> </dd> <dt> <a name='item208'>[208]</a> <a href ="/abs/2501.14216" title="Abstract" id="2501.14216"> arXiv:2501.14216 </a> (replaced) [<a href="/pdf/2501.14216" title="Download PDF" id="pdf-2501.14216" aria-labelledby="pdf-2501.14216">pdf</a>, <a href="https://arxiv.org/html/2501.14216v3" title="View HTML" id="html-2501.14216" aria-labelledby="html-2501.14216" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2501.14216" title="Other formats" id="oth-2501.14216" aria-labelledby="oth-2501.14216">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> TFG-Flow: Training-free Guidance in Multimodal Generative Flow </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Lin,+H">Haowei Lin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+S">Shanda Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ye,+H">Haotian Ye</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+Y">Yiming Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ermon,+S">Stefano Ermon</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liang,+Y">Yitao Liang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ma,+J">Jianzhu Ma</a></div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> ICLR 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Computational Engineering, Finance, and Science (cs.CE) </div> <p class='mathjax'> Given an unconditional generative model and a predictor for a target property (e.g., a classifier), the goal of training-free guidance is to generate samples with desirable target properties without additional training. As a highly efficient technique for steering generative models toward flexible outcomes, training-free guidance has gained increasing attention in diffusion models. However, existing methods only handle data in continuous spaces, while many scientific applications involve both continuous and discrete data (referred to as multimodality). Another emerging trend is the growing use of the simple and general flow matching framework in building generative foundation models, where guided generation remains under-explored. To address this, we introduce TFG-Flow, a novel training-free guidance method for multimodal generative flow. TFG-Flow addresses the curse-of-dimensionality while maintaining the property of unbiased sampling in guiding discrete variables. We validate TFG-Flow on four molecular design tasks and show that TFG-Flow has great potential in drug design by generating molecules with desired properties. </p> </div> </dd> <dt> <a name='item209'>[209]</a> <a href ="/abs/2501.15785" title="Abstract" id="2501.15785"> arXiv:2501.15785 </a> (replaced) [<a href="/pdf/2501.15785" title="Download PDF" id="pdf-2501.15785" aria-labelledby="pdf-2501.15785">pdf</a>, <a href="https://arxiv.org/html/2501.15785v2" title="View HTML" id="html-2501.15785" aria-labelledby="html-2501.15785" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2501.15785" title="Other formats" id="oth-2501.15785" aria-labelledby="oth-2501.15785">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Memorization and Regularization in Generative Diffusion Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Baptista,+R">Ricardo Baptista</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dasgupta,+A">Agnimitra Dasgupta</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kovachki,+N+B">Nikola B. Kovachki</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Oberai,+A">Assad Oberai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Stuart,+A+M">Andrew M. Stuart</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 59 pages, 20 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Dynamical Systems (math.DS); Optimization and Control (math.OC) </div> <p class='mathjax'> Diffusion models have emerged as a powerful framework for generative modeling. At the heart of the methodology is score matching: learning gradients of families of log-densities for noisy versions of the data distribution at different scales. When the loss function adopted in score matching is evaluated using empirical data, rather than the population loss, the minimizer corresponds to the score of a time-dependent Gaussian mixture. However, use of this analytically tractable minimizer leads to data memorization: in both unconditioned and conditioned settings, the generative model returns the training samples. This paper contains an analysis of the dynamical mechanism underlying memorization. The analysis highlights the need for regularization to avoid reproducing the analytically tractable minimizer; and, in so doing, lays the foundations for a principled understanding of how to regularize. Numerical experiments investigate the properties of: (i) Tikhonov regularization; (ii) regularization designed to promote asymptotic consistency; and (iii) regularizations induced by under-parameterization of a neural network or by early stopping when training a neural network. These experiments are evaluated in the context of memorization, and directions for future development of regularization are highlighted. </p> </div> </dd> <dt> <a name='item210'>[210]</a> <a href ="/abs/2502.06728" title="Abstract" id="2502.06728"> arXiv:2502.06728 </a> (replaced) [<a href="/pdf/2502.06728" title="Download PDF" id="pdf-2502.06728" aria-labelledby="pdf-2502.06728">pdf</a>, <a href="https://arxiv.org/html/2502.06728v2" title="View HTML" id="html-2502.06728" aria-labelledby="html-2502.06728" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2502.06728" title="Other formats" id="oth-2502.06728" aria-labelledby="oth-2502.06728">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> FlexDeMo: Decoupled Momentum Optimization for Hybrid Sharded Data Parallel Training </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=From,+M+H">Mogens Henrik From</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nielsen,+J">Jacob Nielsen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Galke,+L">Lukas Galke</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Schneider-Kamp,+P">Peter Schneider-Kamp</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Training large neural network models requires extensive computational resources, often distributed across several nodes and accelerators. Recent findings suggest that it may be sufficient to only exchange the fast moving components of the gradients, while accumulating momentum locally (Decoupled Momentum, or DeMo). However, when considering larger models that do not fit on a single accelerator, the exchange of gradient information and the integration of DeMo needs to be reconsidered. Here, we propose employing a hybrid sharded data parallel training strategy, FlexDeMo, whereby nodes fully shard model parameters locally between different accelerators, while inter-node communication bandwidth requirements are reduced by synchronizing only fast-moving components instead of the full gradients. This effectively combines previous hybrid sharded strategies with the advantages of decoupled momentum. Our experimental results show that FlexDeMo is on par with hybrid sharded data parallel training employing AdamW and full gradient synchronization in terms of validation loss, demonstrating its viability. Furthermore, FlexDeMo achieves improved training speed compared to full gradient synchronization across nodes. In a bandwidth-constrained 2-node setup, FlexDeMo allows reaching desired levels of validation loss faster than hybrid sharded data parallel training with full gradient synchronization. </p> </div> </dd> <dt> <a name='item211'>[211]</a> <a href ="/abs/2502.06971" title="Abstract" id="2502.06971"> arXiv:2502.06971 </a> (replaced) [<a href="/pdf/2502.06971" title="Download PDF" id="pdf-2502.06971" aria-labelledby="pdf-2502.06971">pdf</a>, <a href="https://arxiv.org/html/2502.06971v3" title="View HTML" id="html-2502.06971" aria-labelledby="html-2502.06971" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2502.06971" title="Other formats" id="oth-2502.06971" aria-labelledby="oth-2502.06971">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> User Preference Meets Pareto-Optimality in Multi-Objective Bayesian Optimization </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ip,+J+H+S">Joshua Hang Sai Ip</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chakrabarty,+A">Ankush Chakrabarty</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mesbah,+A">Ali Mesbah</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Romeres,+D">Diego Romeres</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> Incorporating user preferences into multi-objective Bayesian optimization (MOBO) allows for personalization of the optimization procedure. Preferences are often abstracted in the form of an unknown utility function, estimated through pairwise comparisons of potential outcomes. However, utility-driven MOBO methods can yield solutions that are dominated by nearby solutions, as non-dominance is not enforced. Additionally, classical MOBO commonly relies on estimating the entire Pareto-front to identify the Pareto-optimal solutions, which can be expensive and ignore user preferences. Here, we present a new method, termed preference-utility-balanced MOBO (PUB-MOBO), that allows users to disambiguate between near-Pareto candidate solutions. PUB-MOBO combines utility-based MOBO with local multi-gradient descent to refine user-preferred solutions to be near-Pareto-optimal. To this end, we propose a novel preference-dominated utility function that concurrently preserves user-preferences and dominance amongst candidate solutions. A key advantage of PUB-MOBO is that the local search is restricted to a (small) region of the Pareto-front directed by user preferences, alleviating the need to estimate the entire Pareto-front. PUB-MOBO is tested on three synthetic benchmark problems: DTLZ1, DTLZ2 and DH1, as well as on three real-world problems: Vehicle Safety, Conceptual Marine Design, and Car Side Impact. PUB-MOBO consistently outperforms state-of-the-art competitors in terms of proximity to the Pareto-front and utility regret across all the problems. </p> </div> </dd> <dt> <a name='item212'>[212]</a> <a href ="/abs/2502.12617" title="Abstract" id="2502.12617"> arXiv:2502.12617 </a> (replaced) [<a href="/pdf/2502.12617" title="Download PDF" id="pdf-2502.12617" aria-labelledby="pdf-2502.12617">pdf</a>, <a href="https://arxiv.org/html/2502.12617v2" title="View HTML" id="html-2502.12617" aria-labelledby="html-2502.12617" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2502.12617" title="Other formats" id="oth-2502.12617" aria-labelledby="oth-2502.12617">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Graph-Enhanced Deep-Reinforcement Learning Framework for the Aircraft Landing Problem </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Maru,+V">Vatsal Maru</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 27 pages, submitted to ESWA, comments are welcome </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Systems and Control (eess.SY) </div> <p class='mathjax'> The Aircraft Landing Problem (ALP) is one of the challenging problems in aircraft transportation and management. The challenge is to schedule the arriving aircraft in a sequence so that the cost and delays are optimized. There are various solution approaches to solving this problem, most of which are based on operations research algorithms and meta-heuristics. Although traditional methods perform better on one or the other factors, there remains a problem of solving real-time rescheduling and computational scalability altogether. This paper presents a novel deep reinforcement learning (DRL) framework that combines graph neural networks with actor-critic architectures to address the ALP. This paper introduces three key contributions: A graph-based state representation that efficiently captures temporal and spatial relationships between aircraft, a specialized actor-critic architecture designed to handle multiple competing objectives in landing scheduling, and a runway balance strategy that ensures efficient resource utilization while maintaining safety constraints. The results show that the trained algorithm can be tested on different problem sets and the results are competitive to operation research algorithms. The experimental results on standard benchmark data sets demonstrate a 99.95% reduction in computational time compared to Mixed Integer Programming (MIP) and 38% higher runway throughput over First Come First Serve (FCFS) approaches. Therefore, the proposed solution is competitive to traditional approaches and achieves substantial advancements. Notably, it does not require retraining, making it particularly suitable for industrial deployment. The frameworks capability to generate solutions within 1 second enables real-time rescheduling, addressing critical requirements of air traffic management. </p> </div> </dd> <dt> <a name='item213'>[213]</a> <a href ="/abs/2502.14281" title="Abstract" id="2502.14281"> arXiv:2502.14281 </a> (replaced) [<a href="/pdf/2502.14281" title="Download PDF" id="pdf-2502.14281" aria-labelledby="pdf-2502.14281">pdf</a>, <a href="https://arxiv.org/html/2502.14281v2" title="View HTML" id="html-2502.14281" aria-labelledby="html-2502.14281" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2502.14281" title="Other formats" id="oth-2502.14281" aria-labelledby="oth-2502.14281">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Correcting Noisy Multilabel Predictions: Modeling Label Noise through Latent Space Shifts </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+W">Weipeng Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+Q">Qin Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xiao,+Y">Yang Xiao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Qiao,+C">Cheng Qiao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cai,+T">Tie Cai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liao,+J">Junwei Liao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hurley,+N+J">Neil J. Hurley</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Piao,+G">Guangyuan Piao</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Noise in data appears to be inevitable in most real-world machine learning applications and would cause severe overfitting problems. Not only can data features contain noise, but labels are also prone to be noisy due to human input. In this paper, rather than noisy label learning in multiclass classifications, we instead focus on the less explored area of noisy label learning for multilabel classifications. Specifically, we investigate the post-correction of predictions generated from classifiers learned with noisy labels. The reasons are two-fold. Firstly, this approach can directly work with the trained models to save computational resources. Secondly, it could be applied on top of other noisy label correction techniques to achieve further improvements. To handle this problem, we appeal to deep generative approaches that are possible for uncertainty estimation. Our model posits that label noise arises from a stochastic shift in the latent variable, providing a more robust and beneficial means for noisy learning. We develop both unsupervised and semi-supervised learning methods for our model. The extensive empirical study presents solid evidence to that our approach is able to consistently improve the independent models and performs better than a number of existing methods across various noisy label settings. Moreover, a comprehensive empirical analysis of the proposed method is carried out to validate its robustness, including sensitivity analysis and an ablation study, among other elements. </p> </div> </dd> <dt> <a name='item214'>[214]</a> <a href ="/abs/2502.16793" title="Abstract" id="2502.16793"> arXiv:2502.16793 </a> (replaced) [<a href="/pdf/2502.16793" title="Download PDF" id="pdf-2502.16793" aria-labelledby="pdf-2502.16793">pdf</a>, <a href="https://arxiv.org/html/2502.16793v2" title="View HTML" id="html-2502.16793" aria-labelledby="html-2502.16793" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2502.16793" title="Other formats" id="oth-2502.16793" aria-labelledby="oth-2502.16793">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> VGFL-SA: Vertical Graph Federated Learning Structure Attack Based on Contrastive Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+Y">Yang Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+B">Bin Zhou</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Graph Neural Networks (GNNs) have gained attention for their ability to learn representations from graph data. Due to privacy concerns and conflicts of interest that prevent clients from directly sharing graph data with one another, Vertical Graph Federated Learning (VGFL) frameworks have been developed. Recent studies have shown that VGFL is vulnerable to adversarial attacks that degrade performance. However, it is a common problem that client nodes are often unlabeled in the realm of VGFL. Consequently, the existing attacks, which rely on the availability of labeling information to obtain gradients, are inherently constrained in their applicability. This limitation precludes their deployment in practical, real-world environments. To address the above problems, we propose a novel graph adversarial attack against VGFL, referred to as VGFL-SA, to degrade the performance of VGFL by modifying the local clients structure without using labels. Specifically, VGFL-SA uses a contrastive learning method to complete the attack before the local clients are trained. VGFL-SA first accesses the graph structure and node feature information of the poisoned clients, and generates the contrastive views by node-degree-based edge augmentation and feature shuffling augmentation. Then, VGFL-SA uses the shared graph encoder to get the embedding of each view, and the gradients of the adjacency matrices are obtained by the contrastive function. Finally, perturbed edges are generated using gradient modification rules. We validated the performance of VGFL-SA by performing a node classification task on real-world datasets, and the results show that VGFL-SA achieves good attack effectiveness and transferability. </p> </div> </dd> <dt> <a name='item215'>[215]</a> <a href ="/abs/2502.17371" title="Abstract" id="2502.17371"> arXiv:2502.17371 </a> (replaced) [<a href="/pdf/2502.17371" title="Download PDF" id="pdf-2502.17371" aria-labelledby="pdf-2502.17371">pdf</a>, <a href="https://arxiv.org/html/2502.17371v3" title="View HTML" id="html-2502.17371" aria-labelledby="html-2502.17371" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2502.17371" title="Other formats" id="oth-2502.17371" aria-labelledby="oth-2502.17371">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Sustainable Greenhouse Microclimate Modeling: A Comparative Analysis of Recurrent and Graph Neural Networks </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Seri,+E">Emiliano Seri</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Petitta,+M">Marcello Petitta</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cornaro,+C">Cristina Cornaro</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Applications (stat.AP) </div> <p class='mathjax'> The integration of photovoltaic (PV) systems into greenhouses not only optimizes land use but also enhances sustainable agricultural practices by enabling dual benefits of food production and renewable energy generation. However, accurate prediction of internal environmental conditions is crucial to ensure optimal crop growth while maximizing energy production. This study introduces a novel application of Spatio-Temporal Graph Neural Networks (STGNNs) to greenhouse microclimate modeling, comparing their performance with traditional Recurrent Neural Networks (RNNs). While RNNs excel at temporal pattern recognition, they cannot explicitly model the directional relationships between environmental variables. Our STGNN approach addresses this limitation by representing these relationships as directed graphs, enabling the model to capture both environmental dependencies and their directionality. Using high-frequency data collected at 15-minute intervals from a greenhouse in Volos, Greece, we demonstrate that RNNs achieve exceptional accuracy in winter conditions ($R^2 = 0.985$) but show limitations during summer cooling system operation. Though STGNNs currently show lower performance (winter $R^2 = 0.947$), their architecture offers greater potential for integrating additional variables such as PV generation and crop growth indicators. </p> </div> </dd> <dt> <a name='item216'>[216]</a> <a href ="/abs/2502.20032" title="Abstract" id="2502.20032"> arXiv:2502.20032 </a> (replaced) [<a href="/pdf/2502.20032" title="Download PDF" id="pdf-2502.20032" aria-labelledby="pdf-2502.20032">pdf</a>, <a href="https://arxiv.org/html/2502.20032v2" title="View HTML" id="html-2502.20032" aria-labelledby="html-2502.20032" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2502.20032" title="Other formats" id="oth-2502.20032" aria-labelledby="oth-2502.20032">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Order-Robust Class Incremental Learning: Graph-Driven Dynamic Similarity Grouping </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Lai,+G">Guannan Lai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+Y">Yujie Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+X">Xiangkun Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+J">Junbo Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+T">Tianrui Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+X">Xin Yang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted to CVPR 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Class Incremental Learning (CIL) aims to enable models to learn new classes sequentially while retaining knowledge of previous ones. Although current methods have alleviated catastrophic forgetting (CF), recent studies highlight that the performance of CIL models is highly sensitive to the order of class arrival, particularly when sequentially introduced classes exhibit high inter-class similarity. To address this critical yet understudied challenge of class order sensitivity, we first extend existing CIL frameworks through theoretical analysis, proving that grouping classes with lower pairwise similarity during incremental phases significantly improves model robustness to order variations. Building on this insight, we propose Graph-Driven Dynamic Similarity Grouping (GDDSG), a novel method that employs graph coloring algorithms to dynamically partition classes into similarity-constrained groups. Each group trains an isolated CIL sub-model and constructs meta-features for class group identification. Experimental results demonstrate that our method effectively addresses the issue of class order sensitivity while achieving optimal performance in both model accuracy and anti-forgetting capability. Our code is available at <a href="https://github.com/AIGNLAI/GDDSG" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item217'>[217]</a> <a href ="/abs/2502.20963" title="Abstract" id="2502.20963"> arXiv:2502.20963 </a> (replaced) [<a href="/pdf/2502.20963" title="Download PDF" id="pdf-2502.20963" aria-labelledby="pdf-2502.20963">pdf</a>, <a href="/format/2502.20963" title="Other formats" id="oth-2502.20963" aria-labelledby="oth-2502.20963">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Retrieval Augmented Generation for Topic Modeling in Organizational Research: An Introduction with Empirical Demonstration </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Spielberger,+G">Gerion Spielberger</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Artinger,+F+M">Florian M. Artinger</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Reb,+J">Jochen Reb</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kerschreiter,+R">Rudolf Kerschreiter</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 30 pages, 4 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); General Economics (econ.GN) </div> <p class='mathjax'> Analyzing textual data is the cornerstone of qualitative research. While traditional methods such as grounded theory and content analysis are widely used, they are labor-intensive and time-consuming. Topic modeling offers an automated complement. Yet, existing approaches, including LLM-based topic modeling, still struggle with issues such as high data preprocessing requirements, interpretability, and reliability. This paper introduces Agentic Retrieval-Augmented Generation (Agentic RAG) as a method for topic modeling with LLMs. It integrates three key components: (1) retrieval, enabling automatized access to external data beyond an LLM's pre-trained knowledge; (2) generation, leveraging LLM capabilities for text synthesis; and (3) agent-driven learning, iteratively refining retrieval and query formulation processes. To empirically validate Agentic RAG for topic modeling, we reanalyze a Twitter/X dataset, previously examined by Mu et al. (2024a). Our findings demonstrate that the approach is more efficient, interpretable and at the same time achieves higher reliability and validity in comparison to the standard machine learning approach but also in comparison to LLM prompting for topic modeling. These results highlight Agentic RAG's ability to generate semantically relevant and reproducible topics, positioning it as a robust, scalable, and transparent alternative for AI-driven qualitative research in leadership, managerial, and organizational research. </p> </div> </dd> <dt> <a name='item218'>[218]</a> <a href ="/abs/2503.01843" title="Abstract" id="2503.01843"> arXiv:2503.01843 </a> (replaced) [<a href="/pdf/2503.01843" title="Download PDF" id="pdf-2503.01843" aria-labelledby="pdf-2503.01843">pdf</a>, <a href="/format/2503.01843" title="Other formats" id="oth-2503.01843" aria-labelledby="oth-2503.01843">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> When Can You Get Away with Low Memory Adam? </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Kalra,+D+S">Dayal Singh Kalra</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kirchenbauer,+J">John Kirchenbauer</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Barkeshli,+M">Maissam Barkeshli</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Goldstein,+T">Tom Goldstein</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Acknowledgement updates and minor writing edits </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Disordered Systems and Neural Networks (cond-mat.dis-nn); Machine Learning (stat.ML) </div> <p class='mathjax'> Adam is the go-to optimizer for training modern machine learning models, but it requires additional memory to maintain the moving averages of the gradients and their squares. While various low-memory optimizers have been proposed that sometimes match the performance of Adam, their lack of reliability has left Adam as the default choice. In this work, we apply a simple layer-wise Signal-to-Noise Ratio (SNR) analysis to quantify when second-moment tensors can be effectively replaced by their means across different dimensions. Our SNR analysis reveals how architecture, training hyperparameters, and dataset properties impact compressibility along Adam's trajectory, naturally leading to $\textit{SlimAdam}$, a memory-efficient Adam variant. $\textit{SlimAdam}$ compresses the second moments along dimensions with high SNR when feasible, and leaves when compression would be detrimental. Through experiments across a diverse set of architectures and training scenarios, we show that $\textit{SlimAdam}$ matches Adam's performance and stability while saving up to $98\%$ of total second moments. Code for $\textit{SlimAdam}$ is available at <a href="https://github.com/dayal-kalra/low-memory-adam" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item219'>[219]</a> <a href ="/abs/2503.01919" title="Abstract" id="2503.01919"> arXiv:2503.01919 </a> (replaced) [<a href="/pdf/2503.01919" title="Download PDF" id="pdf-2503.01919" aria-labelledby="pdf-2503.01919">pdf</a>, <a href="https://arxiv.org/html/2503.01919v2" title="View HTML" id="html-2503.01919" aria-labelledby="html-2503.01919" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.01919" title="Other formats" id="oth-2503.01919" aria-labelledby="oth-2503.01919">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Reinforcement learning with combinatorial actions for coupled restless bandits </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+L">Lily Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wilder,+B">Bryan Wilder</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Khalil,+E+B">Elias B. Khalil</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tambe,+M">Milind Tambe</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> To appear at ICLR 2025. Code at <a href="https://github.com/lily-x/combinatorial-rmab" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> The Thirteenth International Conference on Learning Representations (ICLR 2025) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Reinforcement learning (RL) has increasingly been applied to solve real-world planning problems, with progress in handling large state spaces and time horizons. However, a key bottleneck in many domains is that RL methods cannot accommodate large, combinatorially structured action spaces. In such settings, even representing the set of feasible actions at a single step may require a complex discrete optimization formulation. We leverage recent advances in embedding trained neural networks into optimization problems to propose SEQUOIA, an RL algorithm that directly optimizes for long-term reward over the feasible action space. Our approach embeds a Q-network into a mixed-integer program to select a combinatorial action in each timestep. Here, we focus on planning over restless bandits, a class of planning problems which capture many real-world examples of sequential decision making. We introduce coRMAB, a broader class of restless bandits with combinatorial actions that cannot be decoupled across the arms of the restless bandit, requiring direct solving over the joint, exponentially large action space. We empirically validate SEQUOIA on four novel restless bandit problems with combinatorial constraints: multiple interventions, path constraints, bipartite matching, and capacity constraints. Our approach significantly outperforms existing methods -- which cannot address sequential planning and combinatorial selection simultaneously -- by an average of 24.8\% on these difficult instances. </p> </div> </dd> <dt> <a name='item220'>[220]</a> <a href ="/abs/2503.03113" title="Abstract" id="2503.03113"> arXiv:2503.03113 </a> (replaced) [<a href="/pdf/2503.03113" title="Download PDF" id="pdf-2503.03113" aria-labelledby="pdf-2503.03113">pdf</a>, <a href="https://arxiv.org/html/2503.03113v2" title="View HTML" id="html-2503.03113" aria-labelledby="html-2503.03113" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.03113" title="Other formats" id="oth-2503.03113" aria-labelledby="oth-2503.03113">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Predicting Space Tourism Demand Using Explainable AI </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Pham,+T">Tan-Hanh Pham</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bi,+J">Jingchen Bi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mesa-Arango,+R">Rodrigo Mesa-Arango</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nguyen,+K">Kim-Doang Nguyen</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 15 pages </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> Comprehensive forecasts of space tourism demand are crucial for businesses to optimize strategies and customer experiences in this burgeoning industry. Traditional methods struggle to capture the complex factors influencing an individual's decision to travel to space. In this paper, we propose an explainable and trustworthy artificial intelligence framework to address the challenge of predicting space tourism demand by following the National Institute of Standards and Technology guidelines. We develop a novel machine learning network, called SpaceNet, capable of learning wide-range dependencies in data and allowing us to analyze the relationships between various factors such as age, income, and risk tolerance. We investigate space travel demand in the US, categorizing it into four types: no travel, moon travel, suborbital, and orbital travel. To this end, we collected 1860 data points in many states and cities with different ages and then conducted our experiment with the data. From our experiments, the SpaceNet achieves an average ROC-AUC of 0.82 $\pm$ 0.088, indicating strong classification performance. Our investigation demonstrated that travel price, age, annual income, gender, and fatality probability are important features in deciding whether a person wants to travel or not. Beyond demand forecasting, we use explainable AI to provide interpretation for the travel-type decisions of an individual, offering insights into the factors driving interest in space travel, which is not possible with traditional classification methods. This knowledge enables businesses to tailor marketing strategies and optimize service offerings in this rapidly evolving market. To the best of our knowledge, this is the first work to implement an explainable and interpretable AI framework for investigating the factors influencing space tourism. </p> </div> </dd> <dt> <a name='item221'>[221]</a> <a href ="/abs/2503.03150" title="Abstract" id="2503.03150"> arXiv:2503.03150 </a> (replaced) [<a href="/pdf/2503.03150" title="Download PDF" id="pdf-2503.03150" aria-labelledby="pdf-2503.03150">pdf</a>, <a href="https://arxiv.org/html/2503.03150v2" title="View HTML" id="html-2503.03150" aria-labelledby="html-2503.03150" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.03150" title="Other formats" id="oth-2503.03150" aria-labelledby="oth-2503.03150">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Position: Model Collapse Does Not Mean What You Think </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Schaeffer,+R">Rylan Schaeffer</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kazdan,+J">Joshua Kazdan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Arulandu,+A+C">Alvan Caleb Arulandu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Koyejo,+S">Sanmi Koyejo</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Computers and Society (cs.CY) </div> <p class='mathjax'> The proliferation of AI-generated content online has fueled concerns over \emph{model collapse}, a degradation in future generative models' performance when trained on synthetic data generated by earlier models. Industry leaders, premier research journals and popular science publications alike have prophesied catastrophic societal consequences stemming from model collapse. In this position piece, we contend this widespread narrative fundamentally misunderstands the scientific evidence. We highlight that research on model collapse actually encompasses eight distinct and at times conflicting definitions of model collapse, and argue that inconsistent terminology within and between papers has hindered building a comprehensive understanding of model collapse. To assess how significantly different interpretations of model collapse threaten future generative models, we posit what we believe are realistic conditions for studying model collapse and then conduct a rigorous assessment of the literature's methodologies through this lens. While we leave room for reasonable disagreement, our analysis of research studies, weighted by how faithfully each study matches real-world conditions, leads us to conclude that certain predicted claims of model collapse rely on assumptions and conditions that poorly match real-world conditions, and in fact several prominent collapse scenarios are readily avoidable. Altogether, this position paper argues that model collapse has been warped from a nuanced multifaceted consideration into an oversimplified threat, and that the evidence suggests specific harms more likely under society's current trajectory have received disproportionately less attention. </p> </div> </dd> <dt> <a name='item222'>[222]</a> <a href ="/abs/2503.09101" title="Abstract" id="2503.09101"> arXiv:2503.09101 </a> (replaced) [<a href="/pdf/2503.09101" title="Download PDF" id="pdf-2503.09101" aria-labelledby="pdf-2503.09101">pdf</a>, <a href="https://arxiv.org/html/2503.09101v2" title="View HTML" id="html-2503.09101" aria-labelledby="html-2503.09101" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.09101" title="Other formats" id="oth-2503.09101" aria-labelledby="oth-2503.09101">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> The Shape of Attraction in UMAP: Exploring the Embedding Forces in Dimensionality Reduction </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Islam,+M+T">Mohammad Tariqul Islam</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fleischer,+J+W">Jason W. Fleischer</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 9 page + appendix </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> Uniform manifold approximation and projection (UMAP) is among the most popular neighbor embedding methods. The method relies on attractive and repulsive forces among high-dimensional data points to obtain a low-dimensional embedding. In this paper, we analyze the forces to reveal their effects on cluster formations and visualization. Repulsion emphasizes differences, controlling cluster boundaries and inter-cluster distance. Attraction is more subtle, as attractive tension between points can manifest simultaneously as attraction and repulsion in the lower-dimensional mapping. This explains the need for learning rate annealing and motivates the different treatments between attractive and repulsive terms. Moreover, by modifying attraction, we improve the consistency of cluster formation under random initialization. Overall, our analysis makes UMAP and similar embedding methods more interpretable, more robust, and more accurate. </p> </div> </dd> <dt> <a name='item223'>[223]</a> <a href ="/abs/2503.09315" title="Abstract" id="2503.09315"> arXiv:2503.09315 </a> (replaced) [<a href="/pdf/2503.09315" title="Download PDF" id="pdf-2503.09315" aria-labelledby="pdf-2503.09315">pdf</a>, <a href="https://arxiv.org/html/2503.09315v3" title="View HTML" id="html-2503.09315" aria-labelledby="html-2503.09315" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.09315" title="Other formats" id="oth-2503.09315" aria-labelledby="oth-2503.09315">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> ShuffleGate: An Efficient and Self-Polarizing Feature Selection Method for Large-Scale Deep Models in Industry </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+Y">Yihong Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chu,+C">Chen Chu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+F">Fan Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+F">Fei Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lin,+Y">Yu Lin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+R">Ruiduan Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+Z">Zhihao Li</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> Deep models in industrial applications rely on thousands of features for accurate predictions, such as deep recommendation systems. While new features are introduced to capture evolving user behavior, outdated or redundant features often remain, significantly increasing storage and computational costs. To address this issue, feature selection methods are widely adopted to identify and remove less important features. However, existing approaches face two major challenges: (1) they often require complex hyperparameter (Hp) tuning, making them difficult to employ in practice, and (2) they fail to produce well-separated feature importance scores, which complicates straightforward feature removal. Moreover, the impact of removing unimportant features can only be evaluated through retraining the model, a time-consuming and resource-intensive process that severely hinders efficient feature selection. <br>To solve these challenges, we propose a novel feature selection approach, ShuffleGate. In particular, it shuffles all feature values across instances simultaneously and uses a gating mechanism that allows the model to dynamically learn the weights for combining the original and shuffled inputs. Notably, it can generate well-separated feature importance scores and estimate the performance without retraining the model, while introducing only a single Hp. Experiments on four public datasets show that our approach outperforms state-of-the-art methods in feature selection for model retraining. Moreover, it has been successfully integrated into the daily iteration of Bilibili's search models across various scenarios, where it significantly reduces feature set size (up to 60%+) and computational resource usage (up to 20%+), while maintaining comparable performance. </p> </div> </dd> <dt> <a name='item224'>[224]</a> <a href ="/abs/2503.09573" title="Abstract" id="2503.09573"> arXiv:2503.09573 </a> (replaced) [<a href="/pdf/2503.09573" title="Download PDF" id="pdf-2503.09573" aria-labelledby="pdf-2503.09573">pdf</a>, <a href="/format/2503.09573" title="Other formats" id="oth-2503.09573" aria-labelledby="oth-2503.09573">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Block Diffusion: Interpolating Between Autoregressive and Diffusion Language Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Arriola,+M">Marianne Arriola</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gokaslan,+A">Aaron Gokaslan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chiu,+J+T">Justin T Chiu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+Z">Zhihan Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Qi,+Z">Zhixuan Qi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Han,+J">Jiaqi Han</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sahoo,+S+S">Subham Sekhar Sahoo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kuleshov,+V">Volodymyr Kuleshov</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> ICLR 2025 Oral. We provide the code at <a href="https://github.com/kuleshov-group/bd3lms" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Diffusion language models offer unique benefits over autoregressive models due to their potential for parallelized generation and controllability, yet they lag in likelihood modeling and are limited to fixed-length generation. In this work, we introduce a class of block diffusion language models that interpolate between discrete denoising diffusion and autoregressive models. Block diffusion overcomes key limitations of both approaches by supporting flexible-length generation and improving inference efficiency with KV caching and parallel token sampling. We propose a recipe for building effective block diffusion models that includes an efficient training algorithm, estimators of gradient variance, and data-driven noise schedules to minimize the variance. Block diffusion sets a new state-of-the-art performance among diffusion models on language modeling benchmarks and enables generation of arbitrary-length sequences. We provide the code, along with the model weights and blog post on the project page: <a href="https://m-arriola.com/bd3lms/" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </p> </div> </dd> <dt> <a name='item225'>[225]</a> <a href ="/abs/2503.09657" title="Abstract" id="2503.09657"> arXiv:2503.09657 </a> (replaced) [<a href="/pdf/2503.09657" title="Download PDF" id="pdf-2503.09657" aria-labelledby="pdf-2503.09657">pdf</a>, <a href="/format/2503.09657" title="Other formats" id="oth-2503.09657" aria-labelledby="oth-2503.09657">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> T媒r-the-Pruner: Unlocking Accurate 50% Structural Pruning for LLMs via Global Sparsity Distribution Optimization </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+G">Guanchen Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+Y">Yixing Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+Z">Zeping Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+J">Ji Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yin,+X">Xuanwu Yin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+D">Dong Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Barsoum,+E">Emad Barsoum</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> Structural pruning enhances hardware-agnostic inference efficiency for large language models (LLMs) but often struggles to maintain performance. Local pruning performs efficient layer-by-layer compression but ignores global topology. Global pruning has the potential to find the optimal solution although resource-intensive. However, existing methods tend to rank structural saliency uniformly, ignoring inter-structure dependencies and failing to achieve end-to-end optimization. To address these limitations, we propose T媒r-the-Pruner, an efficient end-to-end search-based global structural pruning framework. This framework constructs a supernet by repeatedly applying local pruning across a range of sparsity ratios to each layer in an LLM, with the core goal of determining the optimal sparsity distribution under a target overall sparsity ratio. Concretely, we introduce an effective local pruning and an expectation error accumulation approach to improve supernet construction. Furthermore, we employ an iterative prune-and-search strategy with coarse-to-fine sparsity granularity to ensure efficient search convergence. Experimental results show that T媒r-the-Pruner achieves state-of-the-art structural pruning, retaining 97% of the dense model's performance while removing a challenging 50% of Llama-3.1-70B's parameters. </p> </div> </dd> <dt> <a name='item226'>[226]</a> <a href ="/abs/2503.10253" title="Abstract" id="2503.10253"> arXiv:2503.10253 </a> (replaced) [<a href="/pdf/2503.10253" title="Download PDF" id="pdf-2503.10253" aria-labelledby="pdf-2503.10253">pdf</a>, <a href="https://arxiv.org/html/2503.10253v2" title="View HTML" id="html-2503.10253" aria-labelledby="html-2503.10253" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.10253" title="Other formats" id="oth-2503.10253" aria-labelledby="oth-2503.10253">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> PIMRL: Physics-Informed Multi-Scale Recurrent Learning for Spatiotemporal Prediction </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wan,+H">Han Wan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+Q">Qi Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mi,+Y">Yuan Mi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sun,+H">Hao Sun</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Simulation of spatiotemporal systems governed by partial differential equations is widely applied in fields such as biology, chemistry, aerospace dynamics, and meteorology. Traditional numerical methods incur high computational costs due to the requirement of small time steps for accurate predictions. While machine learning has reduced these costs, long-term predictions remain challenged by error accumulation, particularly in scenarios with insufficient data or varying time scales, where stability and accuracy are compromised. Existing methods often neglect the effective utilization of multi-scale data, leading to suboptimal robustness in predictions. To address these issues, we propose a novel multi-scale learning framework, namely, the Physics-Informed Multi-Scale Recurrent Learning (PIMRL), to effectively leverage multi-scale data for spatiotemporal dynamics prediction. The PIMRL framework comprises two modules: the micro-scale module embeds physical knowledge into neural networks via pretraining, and the macro-scale module adopts a data-driven approach to learn the temporal evolution of physics in the latent space. Experimental results demonstrate that the PIMRL framework consistently achieves state-of-the-art performance across five benchmark datasets ranging from one to three dimensions, showing average improvements of over 9\% in both RMSE and MAE evaluation metrics, with maximum enhancements reaching up to 80%. </p> </div> </dd> <dt> <a name='item227'>[227]</a> <a href ="/abs/2503.11709" title="Abstract" id="2503.11709"> arXiv:2503.11709 </a> (replaced) [<a href="/pdf/2503.11709" title="Download PDF" id="pdf-2503.11709" aria-labelledby="pdf-2503.11709">pdf</a>, <a href="https://arxiv.org/html/2503.11709v2" title="View HTML" id="html-2503.11709" aria-labelledby="html-2503.11709" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.11709" title="Other formats" id="oth-2503.11709" aria-labelledby="oth-2503.11709">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Conformal Prediction and Human Decision Making </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Hullman,+J">Jessica Hullman</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+Y">Yifan Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xie,+D">Dawei Xie</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Guo,+Z">Ziyang Guo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gelman,+A">Andrew Gelman</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Machine Learning (stat.ML) </div> <p class='mathjax'> Methods to quantify uncertainty in predictions from arbitrary models are in demand in high-stakes domains like medicine and finance. Conformal prediction has emerged as a popular method for producing a set of predictions with specified average coverage, in place of a single prediction and confidence value. However, the value of conformal prediction sets to assist human decisions remains elusive due to the murky relationship between coverage guarantees and decision makers' goals and strategies. How should we think about conformal prediction sets as a form of decision support? We outline a decision theoretic framework for evaluating predictive uncertainty as informative signals, then contrast what can be said within this framework about idealized use of calibrated probabilities versus conformal prediction sets. Informed by prior empirical results and theories of human decisions under uncertainty, we formalize a set of possible strategies by which a decision maker might use a prediction set. We identify ways in which conformal prediction sets and posthoc predictive uncertainty quantification more broadly are in tension with common goals and needs in human-AI decision making. We give recommendations for future research in predictive uncertainty quantification to support human decision makers. </p> </div> </dd> <dt> <a name='item228'>[228]</a> <a href ="/abs/2503.11737" title="Abstract" id="2503.11737"> arXiv:2503.11737 </a> (replaced) [<a href="/pdf/2503.11737" title="Download PDF" id="pdf-2503.11737" aria-labelledby="pdf-2503.11737">pdf</a>, <a href="https://arxiv.org/html/2503.11737v2" title="View HTML" id="html-2503.11737" aria-labelledby="html-2503.11737" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.11737" title="Other formats" id="oth-2503.11737" aria-labelledby="oth-2503.11737">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Multi-View Node Pruning for Accurate Graph Representation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Park,+J">Jiseong Park</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+H">Hanjin Kim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+S">Seojin Kim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Choi,+J">Jueun Choi</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Jiseong Park and Hanjin Kim are co-first author for this work </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Graph pooling, which compresses a whole graph into a smaller coarsened graph, is an essential component of graph representation learning. To efficiently compress a given graph, graph pooling methods often drop their nodes with attention-based scoring with the task loss. However, this often results in simply removing nodes with lower degrees without consideration of their feature-level relevance to the given task. To fix this problem, we propose a Multi-View Pruning(MVP), a graph pruning method based on a multi-view framework and reconstruction loss. Given a graph, MVP first constructs multiple graphs for different views either by utilizing the predefined modalities or by randomly partitioning the input features, to consider the importance of each node in diverse perspectives. Then, it learns the score for each node by considering both the reconstruction and the task loss. MVP can be incorporated with any hierarchical pooling framework to score the nodes. We validate MVP on multiple benchmark datasets by coupling it with two graph pooling methods, and show that it significantly improves the performance of the base graph pooling method, outperforming all baselines. Further analysis shows that both the encoding of multiple views and the consideration of reconstruction loss are the key to the success of MVP, and that it indeed identifies nodes that are less important according to domain knowledge. </p> </div> </dd> <dt> <a name='item229'>[229]</a> <a href ="/abs/2503.11741" title="Abstract" id="2503.11741"> arXiv:2503.11741 </a> (replaced) [<a href="/pdf/2503.11741" title="Download PDF" id="pdf-2503.11741" aria-labelledby="pdf-2503.11741">pdf</a>, <a href="https://arxiv.org/html/2503.11741v2" title="View HTML" id="html-2503.11741" aria-labelledby="html-2503.11741" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.11741" title="Other formats" id="oth-2503.11741" aria-labelledby="oth-2503.11741">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> BioMamba: Leveraging Spectro-Temporal Embedding in Bidirectional Mamba for Enhanced Biosignal Classification </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Qian,+J">Jian Qian</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Goh,+T+L">Teck Lun Goh</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xie,+B">Bingyu Xie</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhu,+C">Chengyao Zhu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wan,+B">Biao Wan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Guan,+Y">Yawen Guan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chiang,+P+Y">Patrick Yin Chiang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Application-> Biological signals </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Biological signals, such as electroencephalograms (EEGs) and electrocardiograms (ECGs), play a pivotal role in numerous clinical practices, such as diagnosing brain and cardiac arrhythmic diseases. Existing methods for biosignal classification rely on Attention-based frameworks with dense Feed Forward layers, which lead to inefficient learning, high computational overhead, and suboptimal performance. In this work, we introduce BioMamba, a Spectro-Temporal Embedding strategy applied to the Bidirectional Mamba framework with Sparse Feed Forward layers to enable effective learning of biosignal sequences. By integrating these three key components, BioMamba effectively addresses the limitations of existing methods. Extensive experiments demonstrate that BioMamba significantly outperforms state-of-the-art methods with marked improvement in classification performance. The advantages of the proposed BioMamba include (1) Reliability: BioMamba consistently delivers robust results, confirmed across six evaluation metrics. (2) Efficiency: We assess both model and training efficiency, the BioMamba demonstrates computational effectiveness by reducing model size and resource consumption compared to existing approaches. (3) Generality: With the capacity to effectively classify a diverse set of tasks, BioMamba demonstrates adaptability and effectiveness across various domains and applications. </p> </div> </dd> <dt> <a name='item230'>[230]</a> <a href ="/abs/2503.11964" title="Abstract" id="2503.11964"> arXiv:2503.11964 </a> (replaced) [<a href="/pdf/2503.11964" title="Download PDF" id="pdf-2503.11964" aria-labelledby="pdf-2503.11964">pdf</a>, <a href="https://arxiv.org/html/2503.11964v2" title="View HTML" id="html-2503.11964" aria-labelledby="html-2503.11964" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.11964" title="Other formats" id="oth-2503.11964" aria-labelledby="oth-2503.11964">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Entropy-regularized Gradient Estimators for Approximate Bayesian Inference </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Kaur,+J">Jasmeet Kaur</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Machine Learning (stat.ML) </div> <p class='mathjax'> Effective uncertainty quantification is important for training modern predictive models with limited data, enhancing both accuracy and robustness. While Bayesian methods are effective for this purpose, they can be challenging to scale. When employing approximate Bayesian inference, ensuring the quality of samples from the posterior distribution in a computationally efficient manner is essential. This paper addresses the estimation of the Bayesian posterior to generate diverse samples by approximating the gradient flow of the Kullback-Leibler (KL) divergence and the cross entropy of the target approximation under the metric induced by the Stein Operator. It presents empirical evaluations on classification tasks to assess the method's performance and discuss its effectiveness for Model-Based Reinforcement Learning that uses uncertainty-aware network dynamics models. </p> </div> </dd> <dt> <a name='item231'>[231]</a> <a href ="/abs/2503.11965" title="Abstract" id="2503.11965"> arXiv:2503.11965 </a> (replaced) [<a href="/pdf/2503.11965" title="Download PDF" id="pdf-2503.11965" aria-labelledby="pdf-2503.11965">pdf</a>, <a href="https://arxiv.org/html/2503.11965v2" title="View HTML" id="html-2503.11965" aria-labelledby="html-2503.11965" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.11965" title="Other formats" id="oth-2503.11965" aria-labelledby="oth-2503.11965">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Revisiting Gradient Descent: A Dual-Weight Method for Improved Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+X">Xi Wang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> We introduce a novel framework for learning in neural networks by decomposing each neuron's weight vector into two distinct parts, $W_1$ and $W_2$, thereby modeling contrastive information directly at the neuron level. Traditional gradient descent stores both positive (target) and negative (non-target) feature information in a single weight vector, often obscuring fine-grained distinctions. Our approach, by contrast, maintains separate updates for target and non-target features, ultimately forming a single effective weight $W = W_1 - W_2$ that is more robust to noise and class imbalance. Experimental results on both regression (California Housing, Wine Quality) and classification (MNIST, Fashion-MNIST, CIFAR-10) tasks suggest that this decomposition enhances generalization and resists overfitting, especially when training data are sparse or noisy. Crucially, the inference complexity remains the same as in the standard $WX + \text{bias}$ setup, offering a practical solution for improved learning without additional inference-time overhead. </p> </div> </dd> <dt> <a name='item232'>[232]</a> <a href ="/abs/2503.12733" title="Abstract" id="2503.12733"> arXiv:2503.12733 </a> (replaced) [<a href="/pdf/2503.12733" title="Download PDF" id="pdf-2503.12733" aria-labelledby="pdf-2503.12733">pdf</a>, <a href="https://arxiv.org/html/2503.12733v2" title="View HTML" id="html-2503.12733" aria-labelledby="html-2503.12733" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.12733" title="Other formats" id="oth-2503.12733" aria-labelledby="oth-2503.12733">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Linearized Alternating Direction Multiplier Method for Federated Matrix Completion Problems </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Hytla,+P">Patrick Hytla</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nghia,+T+T+A">Tran T. A. Nghia</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Phan,+D+N">Duy Nhat Phan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rice,+A">Andrew Rice</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 29 pages, 4 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span> </div> <p class='mathjax'> Matrix completion is fundamental for predicting missing data with a wide range of applications in personalized healthcare, e-commerce, recommendation systems, and social network analysis. Traditional matrix completion approaches typically assume centralized data storage, which raises challenges in terms of computational efficiency, scalability, and user privacy. In this paper, we address the problem of federated matrix completion, focusing on scenarios where user-specific data is distributed across multiple clients, and privacy constraints are uncompromising. Federated learning provides a promising framework to address these challenges by enabling collaborative learning across distributed datasets without sharing raw data. We propose \texttt{FedMC-ADMM} for solving federated matrix completion problems, a novel algorithmic framework that combines the Alternating Direction Method of Multipliers with a randomized block-coordinate strategy and alternating proximal gradient steps. Unlike existing federated approaches, \texttt{FedMC-ADMM} effectively handles multi-block nonconvex and nonsmooth optimization problems, allowing efficient computation while preserving user privacy. We analyze the theoretical properties of our algorithm, demonstrating subsequential convergence and establishing a convergence rate of $\mathcal{O}(K^{-1/2})$, leading to a communication complexity of $\mathcal{O}(\epsilon^{-2})$ for reaching an $\epsilon$-stationary point. This work is the first to establish these theoretical guarantees for federated matrix completion in the presence of multi-block variables. To validate our approach, we conduct extensive experiments on real-world datasets, including MovieLens 1M, 10M, and Netflix. The results demonstrate that \texttt{FedMC-ADMM} outperforms existing methods in terms of convergence speed and testing accuracy. </p> </div> </dd> <dt> <a name='item233'>[233]</a> <a href ="/abs/2205.05749" title="Abstract" id="2205.05749"> arXiv:2205.05749 </a> (replaced) [<a href="/pdf/2205.05749" title="Download PDF" id="pdf-2205.05749" aria-labelledby="pdf-2205.05749">pdf</a>, <a href="https://arxiv.org/html/2205.05749v3" title="View HTML" id="html-2205.05749" aria-labelledby="html-2205.05749" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2205.05749" title="Other formats" id="oth-2205.05749" aria-labelledby="oth-2205.05749">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Are Metrics Enough? Guidelines for Communicating and Visualizing Predictive Models to Subject Matter Experts </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Suh,+A">Ashley Suh</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Appleby,+G">Gabriel Appleby</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Anderson,+E+W">Erik W. Anderson</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Finelli,+L">Luca Finelli</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chang,+R">Remco Chang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cashman,+D">Dylan Cashman</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> IEEE TVCG 2023 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Human-Computer Interaction (cs.HC)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Presenting a predictive model's performance is a communication bottleneck that threatens collaborations between data scientists and subject matter experts. Accuracy and error metrics alone fail to tell the whole story of a model - its risks, strengths, and limitations - making it difficult for subject matter experts to feel confident in their decision to use a model. As a result, models may fail in unexpected ways or go entirely unused, as subject matter experts disregard poorly presented models in favor of familiar, yet arguably substandard methods. In this paper, we describe an iterative study conducted with both subject matter experts and data scientists to understand the gaps in communication between these two groups. We find that, while the two groups share common goals of understanding the data and predictions of the model, friction can stem from unfamiliar terms, metrics, and visualizations - limiting the transfer of knowledge to SMEs and discouraging clarifying questions being asked during presentations. Based on our findings, we derive a set of communication guidelines that use visualization as a common medium for communicating the strengths and weaknesses of a model. We provide a demonstration of our guidelines in a regression modeling scenario and elicit feedback on their use from subject matter experts. From our demonstration, subject matter experts were more comfortable discussing a model's performance, more aware of the trade-offs for the presented model, and better equipped to assess the model's risks - ultimately informing and contextualizing the model's use beyond text and numbers. </p> </div> </dd> <dt> <a name='item234'>[234]</a> <a href ="/abs/2208.06648" title="Abstract" id="2208.06648"> arXiv:2208.06648 </a> (replaced) [<a href="/pdf/2208.06648" title="Download PDF" id="pdf-2208.06648" aria-labelledby="pdf-2208.06648">pdf</a>, <a href="https://arxiv.org/html/2208.06648v4" title="View HTML" id="html-2208.06648" aria-labelledby="html-2208.06648" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2208.06648" title="Other formats" id="oth-2208.06648" aria-labelledby="oth-2208.06648">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Imputation Strategies Under Clinical Presence: Impact on Algorithmic Fairness </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Jeanselme,+V">Vincent Jeanselme</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=De-Arteaga,+M">Maria De-Arteaga</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Z">Zhe Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Barrett,+J">Jessica Barrett</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tom,+B">Brian Tom</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Full Journal Version under review; Presented at the conference Machine Learning for Health (ML4H) 2022 Published in the Proceedings of Machine Learning Research (193) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Artificial Intelligence (cs.AI)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Machine learning risks reinforcing biases present in data and, as we argue in this work, in what is absent from data. In healthcare, societal and decision biases shape patterns in missing data, yet the algorithmic fairness implications of group-specific missingness are poorly understood. The way we address missingness in healthcare can have detrimental impacts on downstream algorithmic fairness. Our work questions current recommendations and practices aimed at handling missing data with a focus on their effect on algorithmic fairness, and offers a path forward. Specifically, we consider the theoretical underpinnings of existing recommendations as well as their empirical predictive performance and corresponding algorithmic fairness measured through subgroup performances. Our results show that current practices for handling missingness lack principled foundations, are disconnected from the realities of missingness mechanisms in healthcare, and can be counterproductive. For example, we show that favouring group-specific imputation strategy can be misguided and exacerbate prediction disparities. We then build on our findings to propose a framework for empirically guiding imputation choices, and an accompanying reporting framework. Our work constitutes an important contribution to recent efforts by regulators and practitioners to grapple with the realities of real-world data, and to foster the responsible and transparent deployment of machine learning systems. We demonstrate the practical utility of the proposed framework through experimentation on widely used datasets, where we show how the proposed framework can guide the selection of imputation strategies, allowing us to choose among strategies that yield equal overall predictive performance but present different algorithmic fairness properties. </p> </div> </dd> <dt> <a name='item235'>[235]</a> <a href ="/abs/2212.06492" title="Abstract" id="2212.06492"> arXiv:2212.06492 </a> (replaced) [<a href="/pdf/2212.06492" title="Download PDF" id="pdf-2212.06492" aria-labelledby="pdf-2212.06492">pdf</a>, <a href="https://arxiv.org/html/2212.06492v2" title="View HTML" id="html-2212.06492" aria-labelledby="html-2212.06492" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2212.06492" title="Other formats" id="oth-2212.06492" aria-labelledby="oth-2212.06492">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> FNDaaS: Content-agnostic Detection of Fake News sites </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Papadopoulos,+P">Panagiotis Papadopoulos</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Spithouris,+D">Dimitris Spithouris</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Markatos,+E+P">Evangelos P. Markatos</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kourtellis,+N">Nicolas Kourtellis</a></div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> 2023 IEEE International Conference on Big Data (BigData) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computers and Society (cs.CY)</span>; Cryptography and Security (cs.CR); Machine Learning (cs.LG) </div> <p class='mathjax'> Automatic fake news detection is a challenging problem in misinformation spreading, and it has tremendous real-world political and social impacts. Past studies have proposed machine learning-based methods for detecting such fake news, focusing on different properties of the published news articles, such as linguistic characteristics of the actual content, which however have limitations due to the apparent language barriers. Departing from such efforts, we propose Fake News Detection-as-a Service (FNDaaS), the first automatic, content-agnostic fake news detection method, that considers new and unstudied features such as network and structural characteristics per news website. This method can be enforced as-a-Service, either at the ISP-side for easier scalability and maintenance, or user-side for better end-user privacy. We demonstrate the efficacy of our method using more than 340K datapoints crawled from existing lists of 637 fake and 1183 real news websites, and by building and testing a proof of concept system that materializes our proposal. Our analysis of data collected from these websites shows that the vast majority of fake news domains are very young and appear to have lower time periods of an IP associated with their domain than real news ones. By conducting various experiments with machine learning classifiers, we demonstrate that FNDaaS can achieve an AUC score of up to 0.967 on past sites, and up to 77-92% accuracy on newly-flagged ones. </p> </div> </dd> <dt> <a name='item236'>[236]</a> <a href ="/abs/2306.00636" title="Abstract" id="2306.00636"> arXiv:2306.00636 </a> (replaced) [<a href="/pdf/2306.00636" title="Download PDF" id="pdf-2306.00636" aria-labelledby="pdf-2306.00636">pdf</a>, <a href="https://arxiv.org/html/2306.00636v2" title="View HTML" id="html-2306.00636" aria-labelledby="html-2306.00636" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2306.00636" title="Other formats" id="oth-2306.00636" aria-labelledby="oth-2306.00636">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Unfair Utilities and First Steps Towards Improving Them </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=J%C3%B8rgensen,+F+H">Frederik Hytting J酶rgensen</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Weichwald,+S">Sebastian Weichwald</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Peters,+J">Jonas Peters</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Computers and Society (cs.CY); Machine Learning (cs.LG) </div> <p class='mathjax'> Many fairness criteria constrain the policy or choice of predictors, which can have unwanted consequences, in particular, when optimizing the policy under such constraints. Here, we advocate to instead focus on the utility function the policy is optimizing for. We define value of information fairness and propose to not use utility functions that violate this criterion. This principle suggests to modify these utility functions such that they satisfy value of information fairness. We describe how this can be done and discuss consequences for the corresponding optimal policies. We apply our framework to thought experiments and the COMPAS data. Focussing on the utility function provides better answers than existing fairness notions: We are not aware of any intuitively fair policy that is disallowed by value of information fairness, and when we find that value of information fairness recommends an intuitively unfair policy, no existing fairness notion finds an intuitively fair policy. </p> </div> </dd> <dt> <a name='item237'>[237]</a> <a href ="/abs/2308.02000" title="Abstract" id="2308.02000"> arXiv:2308.02000 </a> (replaced) [<a href="/pdf/2308.02000" title="Download PDF" id="pdf-2308.02000" aria-labelledby="pdf-2308.02000">pdf</a>, <a href="https://arxiv.org/html/2308.02000v2" title="View HTML" id="html-2308.02000" aria-labelledby="html-2308.02000" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2308.02000" title="Other formats" id="oth-2308.02000" aria-labelledby="oth-2308.02000">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Bridging Neural and Symbolic Representations with Transitional Dictionary Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Cheng,+J">Junyan Cheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chin,+P">Peter Chin</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> ICLR 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Artificial Intelligence (cs.AI)</span>; Computer Vision and Pattern Recognition (cs.CV); Machine Learning (cs.LG) </div> <p class='mathjax'> This paper introduces a novel Transitional Dictionary Learning (TDL) framework that can implicitly learn symbolic knowledge, such as visual parts and relations, by reconstructing the input as a combination of parts with implicit relations. We propose a game-theoretic diffusion model to decompose the input into visual parts using the dictionaries learned by the Expectation Maximization (EM) algorithm, implemented as the online prototype clustering, based on the decomposition results. Additionally, two metrics, clustering information gain, and heuristic shape score are proposed to evaluate the model. Experiments are conducted on three abstract compositional visual object datasets, which require the model to utilize the compositionality of data instead of simply exploiting visual features. Then, three tasks on symbol grounding to predefined classes of parts and relations, as well as transfer learning to unseen classes, followed by a human evaluation, were carried out on these datasets. The results show that the proposed method discovers compositional patterns, which significantly outperforms the state-of-the-art unsupervised part segmentation methods that rely on visual features from pre-trained backbones. Furthermore, the proposed metrics are consistent with human evaluations. </p> </div> </dd> <dt> <a name='item238'>[238]</a> <a href ="/abs/2308.03239" title="Abstract" id="2308.03239"> arXiv:2308.03239 </a> (replaced) [<a href="/pdf/2308.03239" title="Download PDF" id="pdf-2308.03239" aria-labelledby="pdf-2308.03239">pdf</a>, <a href="https://arxiv.org/html/2308.03239v2" title="View HTML" id="html-2308.03239" aria-labelledby="html-2308.03239" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2308.03239" title="Other formats" id="oth-2308.03239" aria-labelledby="oth-2308.03239">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Unsynchronized Decentralized Q-Learning: Two Timescale Analysis By Persistence </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Yongacoglu,+B">Bora Yongacoglu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Arslan,+G">G眉rdal Arslan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Y%C3%BCksel,+S">Serdar Y眉ksel</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted to SIAM Journal on Control and Optimization </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Science and Game Theory (cs.GT)</span>; Machine Learning (cs.LG); Multiagent Systems (cs.MA) </div> <p class='mathjax'> Non-stationarity is a fundamental challenge in multi-agent reinforcement learning (MARL), where agents update their behaviour as they learn. Many theoretical advances in MARL avoid the challenge of non-stationarity by coordinating the policy updates of agents in various ways, including synchronizing times at which agents are allowed to revise their policies. Synchronization enables analysis of many MARL algorithms via multi-timescale methods, but such synchronization is infeasible in many decentralized applications. In this paper, we study an unsynchronized variant of the decentralized Q-learning algorithm, a recent MARL algorithm for stochastic games. We provide sufficient conditions under which the unsynchronized algorithm drives play to equilibrium with high probability. Our solution utilizes constant learning rates in the Q-factor update, which we show to be critical for relaxing the synchronization assumptions of earlier work. Our analysis also applies to unsynchronized generalizations of a number of other algorithms from the regret testing tradition, whose performance is analyzed by multi-timescale methods that study Markov chains obtained via policy update dynamics. This work extends the applicability of the decentralized Q-learning algorithm and its relatives to settings in which parameters are selected in an independent manner, and tames non-stationarity without imposing the coordination assumptions of prior work. </p> </div> </dd> <dt> <a name='item239'>[239]</a> <a href ="/abs/2308.04585" title="Abstract" id="2308.04585"> arXiv:2308.04585 </a> (replaced) [<a href="/pdf/2308.04585" title="Download PDF" id="pdf-2308.04585" aria-labelledby="pdf-2308.04585">pdf</a>, <a href="https://arxiv.org/html/2308.04585v4" title="View HTML" id="html-2308.04585" aria-labelledby="html-2308.04585" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2308.04585" title="Other formats" id="oth-2308.04585" aria-labelledby="oth-2308.04585">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Kernel Single Proxy Control for Deterministic Confounding </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Xu,+L">Liyuan Xu</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Gretton,+A">Arthur Gretton</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> We consider the problem of causal effect estimation with an unobserved confounder, where we observe a single proxy variable that is associated with the confounder. Although it has been shown that the recovery of an average causal effect is impossible in general from a single proxy variable, we show that causal recovery is possible if the outcome is generated deterministically. This generalizes existing work on causal methods with a single proxy variable to the continuous treatment setting. We propose two kernel-based methods for this setting: the first based on the two-stage regression approach, and the second based on a maximum moment restriction approach. We prove that both approaches can consistently estimate the causal effect, and we empirically demonstrate that we can successfully recover the causal effect on challenging synthetic benchmarks. </p> </div> </dd> <dt> <a name='item240'>[240]</a> <a href ="/abs/2308.11256" title="Abstract" id="2308.11256"> arXiv:2308.11256 </a> (replaced) [<a href="/pdf/2308.11256" title="Download PDF" id="pdf-2308.11256" aria-labelledby="pdf-2308.11256">pdf</a>, <a href="https://arxiv.org/html/2308.11256v2" title="View HTML" id="html-2308.11256" aria-labelledby="html-2308.11256" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2308.11256" title="Other formats" id="oth-2308.11256" aria-labelledby="oth-2308.11256">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Efficient Last-iterate Convergence Algorithms in Solving Games </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Meng,+L">Linjian Meng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Y">Youzhi Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ge,+Z">Zhenxing Ge</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+S">Shangdong Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ding,+T">Tianyu Ding</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+W">Wenbin Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+T">Tianpei Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=An,+B">Bo An</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gao,+Y">Yang Gao</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Science and Game Theory (cs.GT)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> To establish last-iterate convergence for Counterfactual Regret Minimization (CFR) algorithms in learning a Nash equilibrium (NE) of extensive-form games (EFGs), recent studies reformulate learning an NE of the original EFG as learning the NEs of a sequence of (perturbed) regularized EFGs. Consequently, proving last-iterate convergence in solving the original EFG reduces to proving last-iterate convergence in solving (perturbed) regularized EFGs. However, the empirical convergence rates of the algorithms in these studies are suboptimal, since they do not utilize Regret Matching (RM)-based CFR algorithms to solve perturbed EFGs, which are known the exceptionally fast empirical convergence rates. Additionally, since solving multiple perturbed regularized EFGs is required, fine-tuning across all such games is infeasible, making parameter-free algorithms highly desirable. In this paper, we prove that CFR$^+$, a classical parameter-free RM-based CFR algorithm, achieves last-iterate convergence in learning an NE of perturbed regularized EFGs. Leveraging CFR$^+$ to solve perturbed regularized EFGs, we get Reward Transformation CFR$^+$ (RTCFR$^+$). Importantly, we extend prior work on the parameter-free property of CFR$^+$, enhancing its stability, which is crucial for the empirical convergence of RTCFR$^+$. Experiments show that RTCFR$^+$ significantly outperforms existing algorithms with theoretical last-iterate convergence guarantees. </p> </div> </dd> <dt> <a name='item241'>[241]</a> <a href ="/abs/2309.11647" title="Abstract" id="2309.11647"> arXiv:2309.11647 </a> (replaced) [<a href="/pdf/2309.11647" title="Download PDF" id="pdf-2309.11647" aria-labelledby="pdf-2309.11647">pdf</a>, <a href="https://arxiv.org/html/2309.11647v4" title="View HTML" id="html-2309.11647" aria-labelledby="html-2309.11647" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2309.11647" title="Other formats" id="oth-2309.11647" aria-labelledby="oth-2309.11647">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Potential and limitations of random Fourier features for dequantizing quantum machine learning </div> <div class='list-authors'><a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Sweke,+R">Ryan Sweke</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Recio-Armengol,+E">Erik Recio-Armengol</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Jerbi,+S">Sofiene Jerbi</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Gil-Fuster,+E">Elies Gil-Fuster</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Fuller,+B">Bryce Fuller</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Eisert,+J">Jens Eisert</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Meyer,+J+J">Johannes Jakob Meyer</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 44 pages (33+11). 6 Figures, with many clarifying figures added to this version from original version. Comments and feedback welcome. Now accepted in Quantum - this is the final version </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> Quantum 9, 1640 (2025) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Quantum Physics (quant-ph)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Quantum machine learning is arguably one of the most explored applications of near-term quantum devices. Much focus has been put on notions of variational quantum machine learning where parameterized quantum circuits (PQCs) are used as learning models. These PQC models have a rich structure which suggests that they might be amenable to efficient dequantization via random Fourier features (RFF). In this work, we establish necessary and sufficient conditions under which RFF does indeed provide an efficient dequantization of variational quantum machine learning for regression. We build on these insights to make concrete suggestions for PQC architecture design, and to identify structures which are necessary for a regression problem to admit a potential quantum advantage via PQC based optimization. </p> </div> </dd> <dt> <a name='item242'>[242]</a> <a href ="/abs/2312.11356" title="Abstract" id="2312.11356"> arXiv:2312.11356 </a> (replaced) [<a href="/pdf/2312.11356" title="Download PDF" id="pdf-2312.11356" aria-labelledby="pdf-2312.11356">pdf</a>, <a href="https://arxiv.org/html/2312.11356v2" title="View HTML" id="html-2312.11356" aria-labelledby="html-2312.11356" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2312.11356" title="Other formats" id="oth-2312.11356" aria-labelledby="oth-2312.11356">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> The Problem of Coherence in Natural Language Explanations of Recommendations </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Raczy%C5%84ski,+J">Jakub Raczy艅ski</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lango,+M">Mateusz Lango</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Stefanowski,+J">Jerzy Stefanowski</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> ECAI 2023 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI); Information Retrieval (cs.IR); Machine Learning (cs.LG) </div> <p class='mathjax'> Providing natural language explanations for recommendations is particularly useful from the perspective of a non-expert user. Although several methods for providing such explanations have recently been proposed, we argue that an important aspect of explanation quality has been overlooked in their experimental evaluation. Specifically, the coherence between generated text and predicted rating, which is a necessary condition for an explanation to be useful, is not properly captured by currently used evaluation measures. In this paper, we highlight the issue of explanation and prediction coherence by 1) presenting results from a manual verification of explanations generated by one of the state-of-the-art approaches 2) proposing a method of automatic coherence evaluation 3) introducing a new transformer-based method that aims to produce more coherent explanations than the state-of-the-art approaches 4) performing an experimental evaluation which demonstrates that this method significantly improves the explanation coherence without affecting the other aspects of recommendation performance. </p> </div> </dd> <dt> <a name='item243'>[243]</a> <a href ="/abs/2312.15686" title="Abstract" id="2312.15686"> arXiv:2312.15686 </a> (replaced) [<a href="/pdf/2312.15686" title="Download PDF" id="pdf-2312.15686" aria-labelledby="pdf-2312.15686">pdf</a>, <a href="https://arxiv.org/html/2312.15686v2" title="View HTML" id="html-2312.15686" aria-labelledby="html-2312.15686" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2312.15686" title="Other formats" id="oth-2312.15686" aria-labelledby="oth-2312.15686">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> PULASki: Learning inter-rater variability using statistical distances to improve probabilistic segmentation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Chatterjee,+S">Soumick Chatterjee</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gaidzik,+F">Franziska Gaidzik</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sciarra,+A">Alessandro Sciarra</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mattern,+H">Hendrik Mattern</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Janiga,+G">G谩bor Janiga</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Speck,+O">Oliver Speck</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=N%C3%BCrnberger,+A">Andreas N眉rnberger</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pathiraja,+S">Sahani Pathiraja</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI); Human-Computer Interaction (cs.HC); Machine Learning (cs.LG) </div> <p class='mathjax'> In the domain of medical imaging, many supervised learning based methods for segmentation face several challenges such as high variability in annotations from multiple experts, paucity of labelled data and class imbalanced datasets. These issues may result in segmentations that lack the requisite precision for clinical analysis and can be misleadingly overconfident without associated uncertainty quantification. This work proposes the PULASki method as a computationally efficient generative tool for biomedical image segmentation that accurately captures variability in expert annotations, even in small datasets. This approach makes use of an improved loss function based on statistical distances in a conditional variational autoencoder structure (Probabilistic UNet), which improves learning of the conditional decoder compared to the standard cross-entropy particularly in class imbalanced problems. The proposed method was analysed for two structurally different segmentation tasks (intracranial vessel and multiple sclerosis (MS) lesion) and compare our results to four well-established baselines in terms of quantitative metrics and qualitative output. These experiments involve class-imbalanced datasets characterised by challenging features, including suboptimal signal-to-noise ratios and high ambiguity. Empirical results demonstrate the PULASKi method outperforms all baselines at the 5\% significance level. Our experiments are also of the first to present a comparative study of the computationally feasible segmentation of complex geometries using 3D patches and the traditional use of 2D slices. The generated segmentations are shown to be much more anatomically plausible than in the 2D case, particularly for the vessel task. </p> </div> </dd> <dt> <a name='item244'>[244]</a> <a href ="/abs/2403.12029" title="Abstract" id="2403.12029"> arXiv:2403.12029 </a> (replaced) [<a href="/pdf/2403.12029" title="Download PDF" id="pdf-2403.12029" aria-labelledby="pdf-2403.12029">pdf</a>, <a href="https://arxiv.org/html/2403.12029v3" title="View HTML" id="html-2403.12029" aria-labelledby="html-2403.12029" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2403.12029" title="Other formats" id="oth-2403.12029" aria-labelledby="oth-2403.12029">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Align and Distill: Unifying and Improving Domain Adaptive Object Detection </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Kay,+J">Justin Kay</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Haucke,+T">Timm Haucke</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Stathatos,+S">Suzanne Stathatos</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Deng,+S">Siqi Deng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Young,+E">Erik Young</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Perona,+P">Pietro Perona</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Beery,+S">Sara Beery</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Van+Horn,+G">Grant Van Horn</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> TMLR camera ready (Featured Certification). 33 pages, 15 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> Object detectors often perform poorly on data that differs from their training set. Domain adaptive object detection (DAOD) methods have recently demonstrated strong results on addressing this challenge. Unfortunately, we identify systemic benchmarking pitfalls that call past results into question and hamper further progress: (a) Overestimation of performance due to underpowered baselines, (b) Inconsistent implementation practices preventing transparent comparisons of methods, and (c) Lack of generality due to outdated backbones and lack of diversity in benchmarks. We address these problems by introducing: (1) A unified benchmarking and implementation framework, Align and Distill (ALDI), enabling comparison of DAOD methods and supporting future development, (2) A fair and modern training and evaluation protocol for DAOD that addresses benchmarking pitfalls, (3) A new DAOD benchmark dataset, CFC-DAOD, enabling evaluation on diverse real-world data, and (4) A new method, ALDI++, that achieves state-of-the-art results by a large margin. ALDI++ outperforms the previous state-of-the-art by +3.5 AP50 on Cityscapes to Foggy Cityscapes, +5.7 AP50 on Sim10k to Cityscapes (where ours is the only method to outperform a fair baseline), and +0.6 AP50 on CFC Kenai to Channel. ALDI and ALDI++ are architecture-agnostic, setting a new state-of-the-art for YOLO and DETR-based DAOD as well without additional hyperparameter tuning. Our framework, dataset, and state-of-the-art method offer a critical reset for DAOD and provide a strong foundation for future research. Code and data are available: <a href="https://github.com/justinkay/aldi" rel="external noopener nofollow" class="link-external link-https">this https URL</a> and <a href="https://github.com/visipedia/caltech-fish-counting" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item245'>[245]</a> <a href ="/abs/2403.13501" title="Abstract" id="2403.13501"> arXiv:2403.13501 </a> (replaced) [<a href="/pdf/2403.13501" title="Download PDF" id="pdf-2403.13501" aria-labelledby="pdf-2403.13501">pdf</a>, <a href="/format/2403.13501" title="Other formats" id="oth-2403.13501" aria-labelledby="oth-2403.13501">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> VSTAR: Generative Temporal Nursing for Longer Dynamic Video Synthesis </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+Y">Yumeng Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Beluch,+W">William Beluch</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Keuper,+M">Margret Keuper</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+D">Dan Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Khoreva,+A">Anna Khoreva</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted at ICLR 2025. Code: <a href="https://github.com/boschresearch/VSTAR" rel="external noopener nofollow" class="link-external link-https">this https URL</a> and project page: <a href="https://yumengli007.github.io/VSTAR" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG); Multimedia (cs.MM) </div> <p class='mathjax'> Despite tremendous progress in the field of text-to-video (T2V) synthesis, open-sourced T2V diffusion models struggle to generate longer videos with dynamically varying and evolving content. They tend to synthesize quasi-static videos, ignoring the necessary visual change-over-time implied in the text prompt. At the same time, scaling these models to enable longer, more dynamic video synthesis often remains computationally intractable. To address this challenge, we introduce the concept of Generative Temporal Nursing (GTN), where we aim to alter the generative process on the fly during inference to improve control over the temporal dynamics and enable generation of longer videos. We propose a method for GTN, dubbed VSTAR, which consists of two key ingredients: 1) Video Synopsis Prompting (VSP) - automatic generation of a video synopsis based on the original single prompt leveraging LLMs, which gives accurate textual guidance to different visual states of longer videos, and 2) Temporal Attention Regularization (TAR) - a regularization technique to refine the temporal attention units of the pre-trained T2V diffusion models, which enables control over the video dynamics. We experimentally showcase the superiority of the proposed approach in generating longer, visually appealing videos over existing open-sourced T2V models. We additionally analyze the temporal attention maps realized with and without VSTAR, demonstrating the importance of applying our method to mitigate neglect of the desired visual change over time. </p> </div> </dd> <dt> <a name='item246'>[246]</a> <a href ="/abs/2405.13637" title="Abstract" id="2405.13637"> arXiv:2405.13637 </a> (replaced) [<a href="/pdf/2405.13637" title="Download PDF" id="pdf-2405.13637" aria-labelledby="pdf-2405.13637">pdf</a>, <a href="https://arxiv.org/html/2405.13637v4" title="View HTML" id="html-2405.13637" aria-labelledby="html-2405.13637" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2405.13637" title="Other formats" id="oth-2405.13637" aria-labelledby="oth-2405.13637">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Curriculum Direct Preference Optimization for Diffusion and Consistency Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Croitoru,+F">Florinel-Alin Croitoru</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hondru,+V">Vlad Hondru</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ionescu,+R+T">Radu Tudor Ionescu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sebe,+N">Nicu Sebe</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shah,+M">Mubarak Shah</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted at CVPR 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> Direct Preference Optimization (DPO) has been proposed as an effective and efficient alternative to reinforcement learning from human feedback (RLHF). In this paper, we propose a novel and enhanced version of DPO based on curriculum learning for text-to-image generation. Our method is divided into two training stages. First, a ranking of the examples generated for each prompt is obtained by employing a reward model. Then, increasingly difficult pairs of examples are sampled and provided to a text-to-image generative (diffusion or consistency) model. Generated samples that are far apart in the ranking are considered to form easy pairs, while those that are close in the ranking form hard pairs. In other words, we use the rank difference between samples as a measure of difficulty. The sampled pairs are split into batches according to their difficulty levels, which are gradually used to train the generative model. Our approach, Curriculum DPO, is compared against state-of-the-art fine-tuning approaches on nine benchmarks, outperforming the competing methods in terms of text alignment, aesthetics and human preference. Our code is available at <a href="https://github.com/CroitoruAlin/Curriculum-DPO" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item247'>[247]</a> <a href ="/abs/2406.04746" title="Abstract" id="2406.04746"> arXiv:2406.04746 </a> (replaced) [<a href="/pdf/2406.04746" title="Download PDF" id="pdf-2406.04746" aria-labelledby="pdf-2406.04746">pdf</a>, <a href="https://arxiv.org/html/2406.04746v2" title="View HTML" id="html-2406.04746" aria-labelledby="html-2406.04746" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2406.04746" title="Other formats" id="oth-2406.04746" aria-labelledby="oth-2406.04746">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> PQPP: A Joint Benchmark for Text-to-Image Prompt and Query Performance Prediction </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Poesina,+E">Eduard Poesina</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Costache,+A+V">Adriana Valentina Costache</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chifu,+A">Adrian-Gabriel Chifu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mothe,+J">Josiane Mothe</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ionescu,+R+T">Radu Tudor Ionescu</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted at CVPR 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI); Computation and Language (cs.CL); Machine Learning (cs.LG) </div> <p class='mathjax'> Text-to-image generation has recently emerged as a viable alternative to text-to-image retrieval, driven by the visually impressive results of generative diffusion models. Although query performance prediction is an active research topic in information retrieval, to the best of our knowledge, there is no prior study that analyzes the difficulty of queries (referred to as prompts) in text-to-image generation, based on human judgments. To this end, we introduce the first dataset of prompts which are manually annotated in terms of image generation performance. Additionally, we extend these evaluations to text-to-image retrieval by collecting manual annotations that represent retrieval performance. We thus establish the first joint benchmark for prompt and query performance prediction (PQPP) across both tasks, comprising over 10K queries. Our benchmark enables (i) the comparative assessment of prompt/query difficulty in both image generation and image retrieval, and (ii) the evaluation of prompt/query performance predictors addressing both generation and retrieval. We evaluate several pre- and post-generation/retrieval performance predictors, thus providing competitive baselines for future research. Our benchmark and code are publicly available at <a href="https://github.com/Eduard6421/PQPP" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item248'>[248]</a> <a href ="/abs/2406.05797" title="Abstract" id="2406.05797"> arXiv:2406.05797 </a> (replaced) [<a href="/pdf/2406.05797" title="Download PDF" id="pdf-2406.05797" aria-labelledby="pdf-2406.05797">pdf</a>, <a href="https://arxiv.org/html/2406.05797v2" title="View HTML" id="html-2406.05797" aria-labelledby="html-2406.05797" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2406.05797" title="Other formats" id="oth-2406.05797" aria-labelledby="oth-2406.05797">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> 3D-MolT5: Leveraging Discrete Structural Information for Molecule-Text Modeling </div> <div class='list-authors'><a href="https://arxiv.org/search/q-bio?searchtype=author&query=Pei,+Q">Qizhi Pei</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Yan,+R">Rui Yan</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Gao,+K">Kaiyuan Gao</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Zhu,+J">Jinhua Zhu</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Wu,+L">Lijun Wu</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted by ICLR 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Biomolecules (q-bio.BM)</span>; Artificial Intelligence (cs.AI); Computational Engineering, Finance, and Science (cs.CE); Computation and Language (cs.CL); Machine Learning (cs.LG) </div> <p class='mathjax'> The integration of molecular and natural language representations has emerged as a focal point in molecular science, with recent advancements in Language Models (LMs) demonstrating significant potential for comprehensive modeling of both domains. However, existing approaches face notable limitations, particularly in their neglect of three-dimensional (3D) information, which is crucial for understanding molecular structures and functions. While some efforts have been made to incorporate 3D molecular information into LMs using external structure encoding modules, significant difficulties remain, such as insufficient interaction across modalities in pre-training and challenges in modality alignment. To address the limitations, we propose \textbf{3D-MolT5}, a unified framework designed to model molecule in both sequence and 3D structure spaces. The key innovation of our approach lies in mapping fine-grained 3D substructure representations into a specialized 3D token vocabulary. This methodology facilitates the seamless integration of sequence and structure representations in a tokenized format, enabling 3D-MolT5 to encode molecular sequences, molecular structures, and text sequences within a unified architecture. Leveraging this tokenized input strategy, we build a foundation model that unifies the sequence and structure data formats. We then conduct joint pre-training with multi-task objectives to enhance the model's comprehension of these diverse modalities within a shared representation space. Thus, our approach significantly improves cross-modal interaction and alignment, addressing key challenges in previous work. Further instruction tuning demonstrated that our 3D-MolT5 has strong generalization ability and surpasses existing methods with superior performance in multiple downstream tasks. Our code is available at <a href="https://github.com/QizhiPei/3D-MolT5" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item249'>[249]</a> <a href ="/abs/2406.08401" title="Abstract" id="2406.08401"> arXiv:2406.08401 </a> (replaced) [<a href="/pdf/2406.08401" title="Download PDF" id="pdf-2406.08401" aria-labelledby="pdf-2406.08401">pdf</a>, <a href="https://arxiv.org/html/2406.08401v4" title="View HTML" id="html-2406.08401" aria-labelledby="html-2406.08401" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2406.08401" title="Other formats" id="oth-2406.08401" aria-labelledby="oth-2406.08401">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Nystr枚m Kernel Stein Discrepancy </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Kalinke,+F">Florian Kalinke</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Szabo,+Z">Zoltan Szabo</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=K.,+B">Bharath K. Sriperumbudur</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Add limitations; accepted for publication at AISTATS 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Machine Learning (cs.LG); Statistics Theory (math.ST) </div> <p class='mathjax'> Kernel methods underpin many of the most successful approaches in data science and statistics, and they allow representing probability measures as elements of a reproducing kernel Hilbert space without loss of information. Recently, the kernel Stein discrepancy (KSD), which combines Stein's method with the flexibility of kernel techniques, gained considerable attention. Through the Stein operator, KSD allows the construction of powerful goodness-of-fit tests where it is sufficient to know the target distribution up to a multiplicative constant. However, the typical U- and V-statistic-based KSD estimators suffer from a quadratic runtime complexity, which hinders their application in large-scale settings. In this work, we propose a Nystr枚m-based KSD acceleration -- with runtime $\mathcal O\left(mn+m^3\right)$ for $n$ samples and $m\ll n$ Nystr枚m points -- , show its $\sqrt{n}$-consistency with a classical sub-Gaussian assumption, and demonstrate its applicability for goodness-of-fit testing on a suite of benchmarks. We also show the $\sqrt n$-consistency of the quadratic-time KSD estimator. </p> </div> </dd> <dt> <a name='item250'>[250]</a> <a href ="/abs/2406.13839" title="Abstract" id="2406.13839"> arXiv:2406.13839 </a> (replaced) [<a href="/pdf/2406.13839" title="Download PDF" id="pdf-2406.13839" aria-labelledby="pdf-2406.13839">pdf</a>, <a href="https://arxiv.org/html/2406.13839v3" title="View HTML" id="html-2406.13839" aria-labelledby="html-2406.13839" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2406.13839" title="Other formats" id="oth-2406.13839" aria-labelledby="oth-2406.13839">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> RNA-FrameFlow: Flow Matching for de novo 3D RNA Backbone Design </div> <div class='list-authors'><a href="https://arxiv.org/search/q-bio?searchtype=author&query=Anand,+R">Rishabh Anand</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Joshi,+C+K">Chaitanya K. Joshi</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Morehead,+A">Alex Morehead</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Jamasb,+A+R">Arian R. Jamasb</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Harris,+C">Charles Harris</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Mathis,+S+V">Simon V. Mathis</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Didi,+K">Kieran Didi</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Ying,+R">Rex Ying</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Hooi,+B">Bryan Hooi</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Li%C3%B2,+P">Pietro Li貌</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Oral presentation at Machine Learning in Computational Biology (MLCB), 2024. Also presented as an Oral at ICML 2024 Structured Probabilistic Inference & Generative Modeling Workshop, and a Spotlight at ICML 2024 AI4Science Workshop </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Biomolecules (q-bio.BM)</span>; Machine Learning (cs.LG); Genomics (q-bio.GN) </div> <p class='mathjax'> We introduce RNA-FrameFlow, the first generative model for 3D RNA backbone design. We build upon SE(3) flow matching for protein backbone generation and establish protocols for data preparation and evaluation to address unique challenges posed by RNA modeling. We formulate RNA structures as a set of rigid-body frames and associated loss functions which account for larger, more conformationally flexible RNA backbones (13 atoms per nucleotide) vs. proteins (4 atoms per residue). Toward tackling the lack of diversity in 3D RNA datasets, we explore training with structural clustering and cropping augmentations. Additionally, we define a suite of evaluation metrics to measure whether the generated RNA structures are globally self-consistent (via inverse folding followed by forward folding) and locally recover RNA-specific structural descriptors. The most performant version of RNA-FrameFlow generates locally realistic RNA backbones of 40-150 nucleotides, over 40% of which pass our validity criteria as measured by a self-consistency TM-score >= 0.45, at which two RNAs have the same global fold. Open-source code: <a href="https://github.com/rish-16/rna-backbone-design" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </p> </div> </dd> <dt> <a name='item251'>[251]</a> <a href ="/abs/2407.16970" title="Abstract" id="2407.16970"> arXiv:2407.16970 </a> (replaced) [<a href="/pdf/2407.16970" title="Download PDF" id="pdf-2407.16970" aria-labelledby="pdf-2407.16970">pdf</a>, <a href="https://arxiv.org/html/2407.16970v3" title="View HTML" id="html-2407.16970" aria-labelledby="html-2407.16970" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2407.16970" title="Other formats" id="oth-2407.16970" aria-labelledby="oth-2407.16970">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Towards Aligning Language Models with Textual Feedback </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Lloret,+S+A">Sa眉c Abadal Lloret</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dhuliawala,+S">Shehzaad Dhuliawala</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Murugesan,+K">Keerthiram Murugesan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sachan,+M">Mrinmaya Sachan</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted to EMNLP 2024 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> We present ALT (ALignment with Textual feedback), an approach that aligns language models with user preferences expressed in text. We argue that text offers greater expressiveness, enabling users to provide richer feedback than simple comparative preferences and this richer feedback can lead to more efficient and effective alignment. ALT aligns the model by conditioning its generation on the textual feedback. Our method relies solely on language modeling techniques and requires minimal hyper-parameter tuning, though it still presents the main benefits of RL-based alignment algorithms and can effectively learn from textual feedback. We explore the efficacy and efficiency of textual feedback across different tasks such as toxicity reduction, summarization, and dialog response generation. We find that ALT outperforms PPO for the task of toxicity reduction while being able to match its performance on summarization with only 20% of the samples. We also explore how ALT can be used with feedback provided by an existing LLM where we explore an LLM providing constrained and unconstrained textual feedback. We also outline future directions to align models with natural language feedback. </p> </div> </dd> <dt> <a name='item252'>[252]</a> <a href ="/abs/2408.05819" title="Abstract" id="2408.05819"> arXiv:2408.05819 </a> (replaced) [<a href="/pdf/2408.05819" title="Download PDF" id="pdf-2408.05819" aria-labelledby="pdf-2408.05819">pdf</a>, <a href="https://arxiv.org/html/2408.05819v2" title="View HTML" id="html-2408.05819" aria-labelledby="html-2408.05819" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2408.05819" title="Other formats" id="oth-2408.05819" aria-labelledby="oth-2408.05819">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> On the Convergence of a Federated Expectation-Maximization Algorithm </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Tao,+Z">Zhixu Tao</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Chandak,+R">Rajita Chandak</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Kulkarni,+S">Sanjeev Kulkarni</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Data heterogeneity has been a long-standing bottleneck in studying the convergence rates of Federated Learning algorithms. In order to better understand the issue of data heterogeneity, we study the convergence rate of the Expectation-Maximization (EM) algorithm for the Federated Mixture of $K$ Linear Regressions model (FMLR). We completely characterize the convergence rate of the EM algorithm under all regimes of $m/n$ where $m$ is the number of clients and $n$ is the number of data points per client. We show that with a signal-to-noise-ratio (SNR) of order $\Omega(\sqrt{K})$, the well-initialized EM algorithm converges within the minimax distance of the ground truth under all regimes. Interestingly, we identify that when the number of clients grows reasonably with respect to the number of data points per client, the EM algorithm only requires a constant number of iterations to converge. We perform experiments on synthetic data to illustrate our results. In line with our theoretical findings, the simulations show that rather than being a bottleneck, data heterogeneity can accelerate the convergence of iterative federated algorithms. </p> </div> </dd> <dt> <a name='item253'>[253]</a> <a href ="/abs/2409.07510" title="Abstract" id="2409.07510"> arXiv:2409.07510 </a> (replaced) [<a href="/pdf/2409.07510" title="Download PDF" id="pdf-2409.07510" aria-labelledby="pdf-2409.07510">pdf</a>, <a href="https://arxiv.org/html/2409.07510v5" title="View HTML" id="html-2409.07510" aria-labelledby="html-2409.07510" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2409.07510" title="Other formats" id="oth-2409.07510" aria-labelledby="oth-2409.07510">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Still More Shades of Null: An Evaluation Suite for Responsible Missing Value Imputation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Khan,+F+A">Falaah Arif Khan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Herasymuk,+D">Denys Herasymuk</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Protsiv,+N">Nazar Protsiv</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Stoyanovich,+J">Julia Stoyanovich</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Artificial Intelligence (cs.AI)</span>; Computers and Society (cs.CY); Machine Learning (cs.LG) </div> <p class='mathjax'> Data missingness is a practical challenge of sustained interest to the scientific community. In this paper, we present Shades-of-Null, an evaluation suite for responsible missing value imputation. Our work is novel in two ways (i) we model realistic and socially-salient missingness scenarios that go beyond Rubin's classic Missing Completely at Random (MCAR), Missing At Random (MAR) and Missing Not At Random (MNAR) settings, to include multi-mechanism missingness (when different missingness patterns co-exist in the data) and missingness shift (when the missingness mechanism changes between training and test) (ii) we evaluate imputers holistically, based on imputation quality and imputation fairness, as well as on the predictive performance, fairness and stability of the models that are trained and tested on the data post-imputation. <br>We use Shades-of-Null to conduct a large-scale empirical study involving 29,736 experimental pipelines, and find that while there is no single best-performing imputation approach for all missingness types, interesting trade-offs arise between predictive performance, fairness and stability, based on the combination of missingness scenario, imputer choice, and the architecture of the predictive model. We make Shades-of-Null publicly available, to enable researchers to rigorously evaluate missing value imputation methods on a wide range of metrics in plausible and socially meaningful scenarios. </p> </div> </dd> <dt> <a name='item254'>[254]</a> <a href ="/abs/2409.08786" title="Abstract" id="2409.08786"> arXiv:2409.08786 </a> (replaced) [<a href="/pdf/2409.08786" title="Download PDF" id="pdf-2409.08786" aria-labelledby="pdf-2409.08786">pdf</a>, <a href="https://arxiv.org/html/2409.08786v2" title="View HTML" id="html-2409.08786" aria-labelledby="html-2409.08786" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2409.08786" title="Other formats" id="oth-2409.08786" aria-labelledby="oth-2409.08786">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Modular Neural Wiretap Codes for Fading Channels </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Seifert,+D">Daniel Seifert</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=G%C3%BCnl%C3%BC,+O">Onur G眉nl眉</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Schaefer,+R+F">Rafael F. Schaefer</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Limit performance assessment to constant rate scenarios, add examination of equivocation rate </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Information Theory (cs.IT)</span>; Cryptography and Security (cs.CR); Machine Learning (cs.LG) </div> <p class='mathjax'> The wiretap channel is a well-studied problem in the physical layer security literature. Although it is proven that the decoding error probability and information leakage can be made arbitrarily small in the asymptotic regime, further research on finite-blocklength codes is required on the path towards practical, secure communication systems. This work provides the first experimental characterization of a deep learning-based, finite-blocklength code construction for multi-tap fading wiretap channels without channel state information. In addition to the evaluation of the average probability of error and information leakage, we examine the designed codes in the presence of fading in terms of the equivocation rate and illustrate the influence of (i) the number of fading taps, (ii) differing variances of the fading coefficients, and (iii) the seed selection for the hash function-based security layer. </p> </div> </dd> <dt> <a name='item255'>[255]</a> <a href ="/abs/2409.10496" title="Abstract" id="2409.10496"> arXiv:2409.10496 </a> (replaced) [<a href="/pdf/2409.10496" title="Download PDF" id="pdf-2409.10496" aria-labelledby="pdf-2409.10496">pdf</a>, <a href="https://arxiv.org/html/2409.10496v5" title="View HTML" id="html-2409.10496" aria-labelledby="html-2409.10496" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2409.10496" title="Other formats" id="oth-2409.10496" aria-labelledby="oth-2409.10496">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> MusicLIME: Explainable Multimodal Music Understanding </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Sotirou,+T">Theodoros Sotirou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lyberatos,+V">Vassilis Lyberatos</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mastromichalakis,+O+M">Orfeas Menis Mastromichalakis</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Stamou,+G">Giorgos Stamou</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> GitHub repository: <a href="https://github.com/IamTheo2000/MusicLIME" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. To be presented at ICASSP 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG); Audio and Speech Processing (eess.AS) </div> <p class='mathjax'> Multimodal models are critical for music understanding tasks, as they capture the complex interplay between audio and lyrics. However, as these models become more prevalent, the need for explainability grows-understanding how these systems make decisions is vital for ensuring fairness, reducing bias, and fostering trust. In this paper, we introduce MusicLIME, a model-agnostic feature importance explanation method designed for multimodal music models. Unlike traditional unimodal methods, which analyze each modality separately without considering the interaction between them, often leading to incomplete or misleading explanations, MusicLIME reveals how audio and lyrical features interact and contribute to predictions, providing a holistic view of the model's decision-making. Additionally, we enhance local explanations by aggregating them into global explanations, giving users a broader perspective of model behavior. Through this work, we contribute to improving the interpretability of multimodal music models, empowering users to make informed choices, and fostering more equitable, fair, and transparent music understanding systems. </p> </div> </dd> <dt> <a name='item256'>[256]</a> <a href ="/abs/2409.13878" title="Abstract" id="2409.13878"> arXiv:2409.13878 </a> (replaced) [<a href="/pdf/2409.13878" title="Download PDF" id="pdf-2409.13878" aria-labelledby="pdf-2409.13878">pdf</a>, <a href="https://arxiv.org/html/2409.13878v2" title="View HTML" id="html-2409.13878" aria-labelledby="html-2409.13878" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2409.13878" title="Other formats" id="oth-2409.13878" aria-labelledby="oth-2409.13878">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Cross-Domain Knowledge Transfer for Underwater Acoustic Classification Using Pre-trained Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Mohammadi,+A">Amirmohammad Mohammadi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kelhe,+T">Tejashri Kelhe</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Carreiro,+D">Davelle Carreiro</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Van+Dine,+A">Alexandra Van Dine</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Peeples,+J">Joshua Peeples</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 6 pages, 4 figures, This work has been submitted to the IEEE for possible publication. Added Grad-CAM analysis. Title changed. This work has been accepted to IEEE OCEANS 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Machine Learning (cs.LG); Audio and Speech Processing (eess.AS) </div> <p class='mathjax'> Transfer learning is commonly employed to leverage large, pre-trained models and perform fine-tuning for downstream tasks. The most prevalent pre-trained models are initially trained using ImageNet. However, their ability to generalize can vary across different data modalities. This study compares pre-trained Audio Neural Networks (PANNs) and ImageNet pre-trained models within the context of underwater acoustic target recognition (UATR). It was observed that the ImageNet pre-trained models slightly out-perform pre-trained audio models in passive sonar classification. We also analyzed the impact of audio sampling rates for model pre-training and fine-tuning. This study contributes to transfer learning applications of UATR, illustrating the potential of pre-trained models to address limitations caused by scarce, labeled data in the UATR domain. </p> </div> </dd> <dt> <a name='item257'>[257]</a> <a href ="/abs/2409.13881" title="Abstract" id="2409.13881"> arXiv:2409.13881 </a> (replaced) [<a href="/pdf/2409.13881" title="Download PDF" id="pdf-2409.13881" aria-labelledby="pdf-2409.13881">pdf</a>, <a href="https://arxiv.org/html/2409.13881v2" title="View HTML" id="html-2409.13881" aria-labelledby="html-2409.13881" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2409.13881" title="Other formats" id="oth-2409.13881" aria-labelledby="oth-2409.13881">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Investigation of Time-Frequency Feature Combinations with Histogram Layer Time Delay Neural Networks </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Mohammadi,+A">Amirmohammad Mohammadi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Masabarakiza,+I">Iren'e Masabarakiza</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Barnes,+E">Ethan Barnes</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Carreiro,+D">Davelle Carreiro</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Van+Dine,+A">Alexandra Van Dine</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Peeples,+J">Joshua Peeples</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 6 pages, 4 figures. This work has been submitted to the IEEE for possible publication. This work has been accepted to IEEE OCEANS 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Sound (cs.SD)</span>; Machine Learning (cs.LG); Audio and Speech Processing (eess.AS) </div> <p class='mathjax'> While deep learning has reduced the prevalence of manual feature extraction, transformation of data via feature engineering remains essential for improving model performance, particularly for underwater acoustic signals. The methods by which audio signals are converted into time-frequency representations and the subsequent handling of these spectrograms can significantly impact performance. This work demonstrates the performance impact of using different combinations of time-frequency features in a histogram layer time delay neural network. An optimal set of features is identified with results indicating that specific feature combinations outperform single data features. </p> </div> </dd> <dt> <a name='item258'>[258]</a> <a href ="/abs/2409.17273" title="Abstract" id="2409.17273"> arXiv:2409.17273 </a> (replaced) [<a href="/pdf/2409.17273" title="Download PDF" id="pdf-2409.17273" aria-labelledby="pdf-2409.17273">pdf</a>, <a href="https://arxiv.org/html/2409.17273v3" title="View HTML" id="html-2409.17273" aria-labelledby="html-2409.17273" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2409.17273" title="Other formats" id="oth-2409.17273" aria-labelledby="oth-2409.17273">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Targeted Neural Architectures in Multi-Objective Frameworks for Complete Glioma Characterization from Multimodal MRI </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Venkatraman,+S">Shravan Venkatraman</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=V,+P">Pandiyaraju V</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=A,+A">Abeshek A</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=A,+A+S">Aravintakshan S A</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=S,+P+K">Pavan Kumar S</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=A,+K">Kannan A</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=S,+M">Madhan S</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 29 pages, 25 figures, 6 tables </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Computer Vision and Pattern Recognition (cs.CV); Machine Learning (cs.LG) </div> <p class='mathjax'> Brain tumors result from abnormal cell growth in brain tissue. If undiagnosed, they cause neurological deficits, including cognitive impairment, motor dysfunction, and sensory loss. As tumors grow, intracranial pressure increases, potentially leading to fatal complications such as brain herniation. Early diagnosis and treatment are crucial to controlling these effects and slowing tumor progression. Deep learning (DL) and artificial intelligence (AI) are increasingly used to assist doctors in early diagnosis through magnetic resonance imaging (MRI) scans. Our research proposes targeted neural architectures within multi-objective frameworks that can localize, segment, and classify the grade of these gliomas from multimodal MRI images to solve this critical issue. Our localization framework utilizes a targeted architecture that enhances the LinkNet framework with an encoder inspired by VGG19 for better multimodal feature extraction from the tumor along with spatial and graph attention mechanisms that sharpen feature focus and inter-feature relationships. For the segmentation objective, we deployed a specialized framework using the SeResNet101 CNN model as the encoder backbone integrated into the LinkNet architecture, achieving an IoU Score of 96%. The classification objective is addressed through a distinct framework implemented by combining the SeResNet152 feature extractor with Adaptive Boosting classifier, reaching an accuracy of 98.53%. Our multi-objective approach with targeted neural architectures demonstrated promising results for complete glioma characterization, with the potential to advance medical AI by enabling early diagnosis and providing more accurate treatment options for patients. </p> </div> </dd> <dt> <a name='item259'>[259]</a> <a href ="/abs/2410.01273" title="Abstract" id="2410.01273"> arXiv:2410.01273 </a> (replaced) [<a href="/pdf/2410.01273" title="Download PDF" id="pdf-2410.01273" aria-labelledby="pdf-2410.01273">pdf</a>, <a href="https://arxiv.org/html/2410.01273v2" title="View HTML" id="html-2410.01273" aria-labelledby="html-2410.01273" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.01273" title="Other formats" id="oth-2410.01273" aria-labelledby="oth-2410.01273">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> CANVAS: Commonsense-Aware Navigation System for Intuitive Human-Robot Interaction </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Choi,+S">Suhwan Choi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cho,+Y">Yongjun Cho</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+M">Minchan Kim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jung,+J">Jaeyoon Jung</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Joe,+M">Myunchul Joe</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Park,+Y">Yubeen Park</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+M">Minseo Kim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+S">Sungwoong Kim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lee,+S">Sungjae Lee</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Park,+H">Hwiseong Park</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chung,+J">Jiwan Chung</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+Y">Youngjae Yu</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted to ICRA 2025, project page <a href="https://worv-ai.github.io/canvas" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span>; Computer Vision and Pattern Recognition (cs.CV); Machine Learning (cs.LG) </div> <p class='mathjax'> Real-life robot navigation involves more than just reaching a destination; it requires optimizing movements while addressing scenario-specific goals. An intuitive way for humans to express these goals is through abstract cues like verbal commands or rough sketches. Such human guidance may lack details or be noisy. Nonetheless, we expect robots to navigate as intended. For robots to interpret and execute these abstract instructions in line with human expectations, they must share a common understanding of basic navigation concepts with humans. To this end, we introduce CANVAS, a novel framework that combines visual and linguistic instructions for commonsense-aware navigation. Its success is driven by imitation learning, enabling the robot to learn from human navigation behavior. We present COMMAND, a comprehensive dataset with human-annotated navigation results, spanning over 48 hours and 219 km, designed to train commonsense-aware navigation systems in simulated environments. Our experiments show that CANVAS outperforms the strong rule-based system ROS NavStack across all environments, demonstrating superior performance with noisy instructions. Notably, in the orchard environment, where ROS NavStack records a 0% total success rate, CANVAS achieves a total success rate of 67%. CANVAS also closely aligns with human demonstrations and commonsense constraints, even in unseen environments. Furthermore, real-world deployment of CANVAS showcases impressive Sim2Real transfer with a total success rate of 69%, highlighting the potential of learning from human demonstrations in simulated environments for real-world applications. </p> </div> </dd> <dt> <a name='item260'>[260]</a> <a href ="/abs/2410.12819" title="Abstract" id="2410.12819"> arXiv:2410.12819 </a> (replaced) [<a href="/pdf/2410.12819" title="Download PDF" id="pdf-2410.12819" aria-labelledby="pdf-2410.12819">pdf</a>, <a href="https://arxiv.org/html/2410.12819v2" title="View HTML" id="html-2410.12819" aria-labelledby="html-2410.12819" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.12819" title="Other formats" id="oth-2410.12819" aria-labelledby="oth-2410.12819">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Deep Adversarial Learning with Activity-Based User Discrimination Task for Human Activity Recognition </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Calatrava-Nicol%C3%A1s,+F+M">Francisco M. Calatrava-Nicol谩s</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Miyauchi,+S">Shoko Miyauchi</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Mozos,+O+M">Oscar Martinez Mozos</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Signal Processing (eess.SP)</span>; Computer Vision and Pattern Recognition (cs.CV); Machine Learning (cs.LG) </div> <p class='mathjax'> We present a new adversarial deep learning framework for the problem of human activity recognition (HAR) using inertial sensors worn by people. Our framework incorporates a novel adversarial activity-based discrimination task that addresses inter-person variability-i.e., the fact that different people perform the same activity in different ways. Overall, our proposed framework outperforms previous approaches on three HAR datasets using a leave-one-(person)-out cross-validation (LOOCV) benchmark. Additional results demonstrate that our discrimination task yields better classification results compared to previous tasks within the same adversarial framework. </p> </div> </dd> <dt> <a name='item261'>[261]</a> <a href ="/abs/2410.16750" title="Abstract" id="2410.16750"> arXiv:2410.16750 </a> (replaced) [<a href="/pdf/2410.16750" title="Download PDF" id="pdf-2410.16750" aria-labelledby="pdf-2410.16750">pdf</a>, <a href="/format/2410.16750" title="Other formats" id="oth-2410.16750" aria-labelledby="oth-2410.16750">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Theoretical Convergence Guarantees for Variational Autoencoders </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Surendran,+S">Sobihan Surendran</a> (LPSM (UMR\_8001)), <a href="https://arxiv.org/search/stat?searchtype=author&query=Godichon-Baggioni,+A">Antoine Godichon-Baggioni</a> (LPSM (UMR\_8001)), <a href="https://arxiv.org/search/stat?searchtype=author&query=Corff,+S+L">Sylvain Le Corff</a> (LPSM (UMR\_8001), SU)</div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Variational Autoencoders (VAE) are popular generative models used to sample from complex data distributions. Despite their empirical success in various machine learning tasks, significant gaps remain in understanding their theoretical properties, particularly regarding convergence guarantees. This paper aims to bridge that gap by providing non-asymptotic convergence guarantees for VAE trained using both Stochastic Gradient Descent and Adam <a href="http://algorithms.We" rel="external noopener nofollow" class="link-external link-http">this http URL</a> derive a convergence rate of $\mathcal{O}(\log n / \sqrt{n})$, where $n$ is the number of iterations of the optimization algorithm, with explicit dependencies on the batch size, the number of variational samples, and other key hyperparameters. Our theoretical analysis applies to both Linear VAE and Deep Gaussian VAE, as well as several VAE variants, including $\beta$-VAE and IWAE. Additionally, we empirically illustrate the impact of hyperparameters on convergence, offering new insights into the theoretical understanding of VAE training. </p> </div> </dd> <dt> <a name='item262'>[262]</a> <a href ="/abs/2410.21301" title="Abstract" id="2410.21301"> arXiv:2410.21301 </a> (replaced) [<a href="/pdf/2410.21301" title="Download PDF" id="pdf-2410.21301" aria-labelledby="pdf-2410.21301">pdf</a>, <a href="https://arxiv.org/html/2410.21301v2" title="View HTML" id="html-2410.21301" aria-labelledby="html-2410.21301" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.21301" title="Other formats" id="oth-2410.21301" aria-labelledby="oth-2410.21301">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Evaluating the Posterior Sampling Ability of Plug&Play Diffusion Methods in Sparse-View CT </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Moroy,+L">Liam Moroy</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Bourmaud,+G">Guillaume Bourmaud</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Champagnat,+F">Fr茅d茅ric Champagnat</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Giovannelli,+J">Jean-Fran莽ois Giovannelli</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Artificial Intelligence (cs.AI); Computer Vision and Pattern Recognition (cs.CV); Machine Learning (cs.LG) </div> <p class='mathjax'> Plug&Play (PnP) diffusion models are state-of-the-art methods in computed tomography (CT) reconstruction. Such methods usually consider applications where the sinogram contains a sufficient amount of information for the posterior distribution to be concentrated around a single mode, and consequently are evaluated using image-to-image metrics such as PSNR/SSIM. Instead, we are interested in reconstructing compressible flow images from sinograms having a small number of projections, which results in a posterior distribution no longer concentrated or even multimodal. Thus, in this paper, we aim at evaluating the approximate posterior of PnP diffusion models and introduce two posterior evaluation properties. We quantitatively evaluate three PnP diffusion methods on three different datasets for several numbers of projections. We surprisingly find that, for each method, the approximate posterior deviates from the true posterior when the number of projections decreases. </p> </div> </dd> <dt> <a name='item263'>[263]</a> <a href ="/abs/2411.04653" title="Abstract" id="2411.04653"> arXiv:2411.04653 </a> (replaced) [<a href="/pdf/2411.04653" title="Download PDF" id="pdf-2411.04653" aria-labelledby="pdf-2411.04653">pdf</a>, <a href="https://arxiv.org/html/2411.04653v2" title="View HTML" id="html-2411.04653" aria-labelledby="html-2411.04653" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.04653" title="Other formats" id="oth-2411.04653" aria-labelledby="oth-2411.04653">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> IGDrivSim: A Benchmark for the Imitation Gap in Autonomous Driving </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Grislain,+C">Cl茅mence Grislain</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Vuorio,+R">Risto Vuorio</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lu,+C">Cong Lu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Whiteson,+S">Shimon Whiteson</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 8 pages, 4 figures, 1 table </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Developing autonomous vehicles that can navigate complex environments with human-level safety and efficiency is a central goal in self-driving research. A common approach to achieving this is imitation learning, where agents are trained to mimic human expert demonstrations collected from real-world driving scenarios. However, discrepancies between human perception and the self-driving car's sensors can introduce an $\textit{imitation}$ gap, leading to imitation learning failures. In this work, we introduce $\textbf{IGDrivSim}$, a benchmark built on top of the Waymax simulator, designed to investigate the effects of the imitation gap in learning autonomous driving policy from human expert demonstrations. Our experiments show that this perception gap between human experts and self-driving agents can hinder the learning of safe and effective driving behaviors. We further show that combining imitation with reinforcement learning, using a simple penalty reward for prohibited behaviors, effectively mitigates these failures. Our code is open-sourced at: <a href="https://github.com/clemgris/IGDrivSim.git" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item264'>[264]</a> <a href ="/abs/2411.06601" title="Abstract" id="2411.06601"> arXiv:2411.06601 </a> (replaced) [<a href="/pdf/2411.06601" title="Download PDF" id="pdf-2411.06601" aria-labelledby="pdf-2411.06601">pdf</a>, <a href="https://arxiv.org/html/2411.06601v3" title="View HTML" id="html-2411.06601" aria-labelledby="html-2411.06601" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.06601" title="Other formats" id="oth-2411.06601" aria-labelledby="oth-2411.06601">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> OffLight: An Offline Multi-Agent Reinforcement Learning Framework for Traffic Signal Control </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Bokade,+R">Rohit Bokade</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jin,+X">Xiaoning Jin</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Artificial Intelligence (cs.AI)</span>; Machine Learning (cs.LG); Multiagent Systems (cs.MA) </div> <p class='mathjax'> Efficient traffic control (TSC) is essential for urban mobility, but traditional systems struggle to handle the complexity of real-world traffic. Multi-agent Reinforcement Learning (MARL) offers adaptive solutions, but online MARL requires extensive interactions with the environment, making it costly and impractical. Offline MARL mitigates these challenges by using historical traffic data for training but faces significant difficulties with heterogeneous behavior policies in real-world datasets, where mixed-quality data complicates learning. We introduce OffLight, a novel offline MARL framework designed to handle heterogeneous behavior policies in TSC datasets. To improve learning efficiency, OffLight incorporates Importance Sampling (IS) to correct for distributional shifts and Return-Based Prioritized Sampling (RBPS) to focus on high-quality experiences. OffLight utilizes a Gaussian Mixture Variational Graph Autoencoder (GMM-VGAE) to capture the diverse distribution of behavior policies from local observations. Extensive experiments across real-world urban traffic scenarios show that OffLight outperforms existing offline RL methods, achieving up to a 7.8% reduction in average travel time and 11.2% decrease in queue length. Ablation studies confirm the effectiveness of OffLight's components in handling heterogeneous data and improving policy performance. These results highlight OffLight's scalability and potential to improve urban traffic management without the risks of online learning. </p> </div> </dd> <dt> <a name='item265'>[265]</a> <a href ="/abs/2411.12730" title="Abstract" id="2411.12730"> arXiv:2411.12730 </a> (replaced) [<a href="/pdf/2411.12730" title="Download PDF" id="pdf-2411.12730" aria-labelledby="pdf-2411.12730">pdf</a>, <a href="/format/2411.12730" title="Other formats" id="oth-2411.12730" aria-labelledby="oth-2411.12730">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Testing classical properties from quantum data </div> <div class='list-authors'><a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Caro,+M+C">Matthias C. Caro</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Naik,+P">Preksha Naik</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Slote,+J">Joseph Slote</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 34 + 2 pages, 2 tables, 1 figure </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Quantum Physics (quant-ph)</span>; Computational Complexity (cs.CC); Data Structures and Algorithms (cs.DS); Machine Learning (cs.LG) </div> <p class='mathjax'> Properties of Boolean functions can often be tested much faster than the functions can be learned. However, this advantage usually disappears when testers are limited to random samples of a function $f$--a natural setting for data science--rather than queries. In this work we initiate the study of a quantum version of this "data science scenario": quantum algorithms that test properties of $f$ solely from quantum data in the form of copies of the function state $|f\rangle \propto \sum_x|x,f(x)\rangle$. <br>$\bullet$ New tests. For three well-established properties--monotonicity, symmetry, and triangle-freeness--we show that the speedup lost when restricting classical testers to sampled data can be recovered by quantum algorithms operating solely from quantum data. <br>$\bullet$ Inadequacy of Fourier sampling. Our new testers use techniques beyond quantum Fourier sampling, and we show that this necessary. In particular, there is no constant-complexity tester for symmetry relying solely on Fourier sampling and random classical samples. <br>$\bullet$ Classical queries vs. quantum data. We exhibit a testing problem that can be solved from $O(1)$ classical queries but that requires $\Omega(2^{n/2})$ function state copies. The Forrelation problem provides a separation of the same magnitude in the opposite direction, so we conclude that quantum data and classical queries are "maximally incomparable" resources for testing. <br>$\bullet$ Towards lower bounds. We also begin the study of lower bounds for testing from quantum data. For quantum monotonicity testing, we prove that the ensembles of Goldreich et al. (2000) and Black (2023), which give exponential lower bounds for classical sample-based testing, do not yield any nontrivial lower bounds for testing from quantum data. New insights specific to quantum data will be required for proving copy complexity lower bounds for testing in this model. </p> </div> </dd> <dt> <a name='item266'>[266]</a> <a href ="/abs/2411.16156" title="Abstract" id="2411.16156"> arXiv:2411.16156 </a> (replaced) [<a href="/pdf/2411.16156" title="Download PDF" id="pdf-2411.16156" aria-labelledby="pdf-2411.16156">pdf</a>, <a href="https://arxiv.org/html/2411.16156v2" title="View HTML" id="html-2411.16156" aria-labelledby="html-2411.16156" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2411.16156" title="Other formats" id="oth-2411.16156" aria-labelledby="oth-2411.16156">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> VideoOrion: Tokenizing Object Dynamics in Videos </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Feng,+Y">Yicheng Feng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+Y">Yijiang Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+W">Wanpeng Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Luo,+H">Hao Luo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yue,+Z">Zihao Yue</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zheng,+S">Sipeng Zheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lu,+Z">Zongqing Lu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> We present VideoOrion, a Video Large Language Model (Video-LLM) that explicitly captures the key semantic information in videos - the spatial-temporal dynamics of objects throughout the videos. VideoOrion employs expert vision models to extract object dynamics through a detect-segment-track pipeline, encoding them into a set of object tokens by aggregating spatial-temporal object features. Our method addresses the persistent challenge in Video-LLMs of efficiently compressing high-dimensional video data into semantic tokens that are comprehensible to LLMs. Compared to prior methods which resort to downsampling the original video or aggregating visual tokens using resamplers, leading to information loss and entangled semantics, VideoOrion not only offers a more natural and efficient way to derive compact, disentangled semantic representations but also enables explicit object modeling of video content with minimal computational cost. Moreover, the introduced object tokens naturally allow VideoOrion to accomplish video-based referring tasks. Experimental results show that VideoOrion can learn to make good use of the object tokens, and achieves competitive results on both general video question answering and video-based referring benchmarks. </p> </div> </dd> <dt> <a name='item267'>[267]</a> <a href ="/abs/2412.08116" title="Abstract" id="2412.08116"> arXiv:2412.08116 </a> (replaced) [<a href="/pdf/2412.08116" title="Download PDF" id="pdf-2412.08116" aria-labelledby="pdf-2412.08116">pdf</a>, <a href="https://arxiv.org/html/2412.08116v2" title="View HTML" id="html-2412.08116" aria-labelledby="html-2412.08116" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2412.08116" title="Other formats" id="oth-2412.08116" aria-labelledby="oth-2412.08116">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Diffusion-based Data Augmentation and Knowledge Distillation with Generated Soft Labels Solving Data Scarcity Problems of SAR Oil Spill Segmentation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Moon,+J">Jaeho Moon</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yun,+J">Jeonghwan Yun</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+J">Jaehyun Kim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lee,+J">Jaehyup Lee</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+M">Munchurl Kim</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Oil spills pose severe environmental risks, making early detection crucial for effective response and mitigation. As Synthetic Aperture Radar (SAR) images operate under all-weather conditions, SAR-based oil spill segmentation enables fast and robust monitoring. However, when using deep learning models, SAR oil spill segmentation often struggles in training due to the scarcity of labeled data. To address this limitation, we propose a diffusion-based data augmentation with knowledge transfer (DAKTer) strategy. Our DAKTer strategy enables a diffusion model to generate SAR oil spill images along with soft label pairs, which offer richer class probability distributions than segmentation masks (i.e. hard labels). Also, for reliable joint generation of high-quality SAR images and well-aligned soft labels, we introduce an SNR-based balancing factor aligning the noise corruption process of both modalilties in diffusion models. By leveraging the generated SAR images and soft labels, a student segmentation model can learn robust feature representations without teacher models trained for the same task, improving its ability to segment oil spill regions. Extensive experiments demonstrate that our DAKTer strategy effectively transfers the knowledge of per-pixel class probabilities to the student segmentation model to distinguish the oil spill regions from other look-alike regions in the SAR images. Our DAKTer strategy boosts various segmentation models to achieve superior performance with large margins compared to other generative data augmentation methods. </p> </div> </dd> <dt> <a name='item268'>[268]</a> <a href ="/abs/2412.08897" title="Abstract" id="2412.08897"> arXiv:2412.08897 </a> (replaced) [<a href="/pdf/2412.08897" title="Download PDF" id="pdf-2412.08897" aria-labelledby="pdf-2412.08897">pdf</a>, <a href="https://arxiv.org/html/2412.08897v2" title="View HTML" id="html-2412.08897" aria-labelledby="html-2412.08897" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2412.08897" title="Other formats" id="oth-2412.08897" aria-labelledby="oth-2412.08897">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Neural Interactive Proofs </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Hammond,+L">Lewis Hammond</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Adam-Day,+S">Sam Adam-Day</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> ICLR'25 camera-ready version; 51 pages, 17 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Artificial Intelligence (cs.AI)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> We consider the problem of how a trusted, but computationally bounded agent (a 'verifier') can learn to interact with one or more powerful but untrusted agents ('provers') in order to solve a given task. More specifically, we study the case in which agents are represented using neural networks and refer to solutions of this problem as neural interactive proofs. First we introduce a unifying framework based on prover-verifier games, which generalises previously proposed interaction protocols. We then describe several new protocols for generating neural interactive proofs, and provide a theoretical comparison of both new and existing approaches. Finally, we support this theory with experiments in two domains: a toy graph isomorphism problem that illustrates the key ideas, and a code validation task using large language models. In so doing, we aim to create a foundation for future work on neural interactive proofs and their application in building safer AI systems. </p> </div> </dd> <dt> <a name='item269'>[269]</a> <a href ="/abs/2412.08976" title="Abstract" id="2412.08976"> arXiv:2412.08976 </a> (replaced) [<a href="/pdf/2412.08976" title="Download PDF" id="pdf-2412.08976" aria-labelledby="pdf-2412.08976">pdf</a>, <a href="https://arxiv.org/html/2412.08976v2" title="View HTML" id="html-2412.08976" aria-labelledby="html-2412.08976" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2412.08976" title="Other formats" id="oth-2412.08976" aria-labelledby="oth-2412.08976">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Identity-Preserving Pose-Guided Character Animation via Facial Landmarks Transformation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Mu,+L">Lianrui Mu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+X">Xingze Zhou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zheng,+W">Wenjie Zheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ye,+J">Jiangnan Ye</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hu,+H">Haoji Hu</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 6 pages, 4 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Creating realistic pose-guided image-to-video character animations while preserving facial identity remains challenging, especially in complex and dynamic scenarios such as dancing, where precise identity consistency is crucial. Existing methods frequently encounter difficulties maintaining facial coherence due to misalignments between facial landmarks extracted from driving videos that provide head pose and expression cues and the facial geometry of the reference images. To address this limitation, we introduce the Facial Landmarks Transformation (FLT) method, which leverages a 3D Morphable Model to address this limitation. FLT converts 2D landmarks into a 3D face model, adjusts the 3D face model to align with the reference identity, and then transforms them back into 2D landmarks to guide the image-to-video generation process. This approach ensures accurate alignment with the reference facial geometry, enhancing the consistency between generated videos and reference images. Experimental results demonstrate that FLT effectively preserves facial identity, significantly improving pose-guided character animation models. </p> </div> </dd> <dt> <a name='item270'>[270]</a> <a href ="/abs/2412.13769" title="Abstract" id="2412.13769"> arXiv:2412.13769 </a> (replaced) [<a href="/pdf/2412.13769" title="Download PDF" id="pdf-2412.13769" aria-labelledby="pdf-2412.13769">pdf</a>, <a href="https://arxiv.org/html/2412.13769v2" title="View HTML" id="html-2412.13769" aria-labelledby="html-2412.13769" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2412.13769" title="Other formats" id="oth-2412.13769" aria-labelledby="oth-2412.13769">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> QuLTSF: Long-Term Time Series Forecasting with Quantum Machine Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Chittoor,+H+H+S">Hari Hara Suthan Chittoor</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Griffin,+P+R">Paul Robert Griffin</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Neufeld,+A">Ariel Neufeld</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Thompson,+J">Jayne Thompson</a>, <a href="https://arxiv.org/search/quant-ph?searchtype=author&query=Gu,+M">Mile Gu</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Published in ICAART 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Quantum Physics (quant-ph)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> Long-term time series forecasting (LTSF) involves predicting a large number of future values of a time series based on the past values. This is an essential task in a wide range of domains including weather forecasting, stock market analysis and disease outbreak prediction. Over the decades LTSF algorithms have transitioned from statistical models to deep learning models like transformer models. Despite the complex architecture of transformer based LTSF models `Are Transformers Effective for Time Series Forecasting? (Zeng et al., 2023)' showed that simple linear models can outperform the state-of-the-art transformer based LTSF models. Recently, quantum machine learning (QML) is evolving as a domain to enhance the capabilities of classical machine learning models. In this paper we initiate the application of QML to LTSF problems by proposing QuLTSF, a simple hybrid QML model for multivariate LTSF. Through extensive experiments on a widely used weather dataset we show the advantages of QuLTSF over the state-of-the-art classical linear models, in terms of reduced mean squared error and mean absolute error. </p> </div> </dd> <dt> <a name='item271'>[271]</a> <a href ="/abs/2412.14295" title="Abstract" id="2412.14295"> arXiv:2412.14295 </a> (replaced) [<a href="/pdf/2412.14295" title="Download PDF" id="pdf-2412.14295" aria-labelledby="pdf-2412.14295">pdf</a>, <a href="https://arxiv.org/html/2412.14295v2" title="View HTML" id="html-2412.14295" aria-labelledby="html-2412.14295" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2412.14295" title="Other formats" id="oth-2412.14295" aria-labelledby="oth-2412.14295">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Temporally Consistent Object-Centric Learning by Contrasting Slots </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Manasyan,+A">Anna Manasyan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Seitzer,+M">Maximilian Seitzer</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Radovic,+F">Filip Radovic</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Martius,+G">Georg Martius</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zadaianchuk,+A">Andrii Zadaianchuk</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Published at CVPR 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG); Robotics (cs.RO) </div> <p class='mathjax'> Unsupervised object-centric learning from videos is a promising approach to extract structured representations from large, unlabeled collections of videos. To support downstream tasks like autonomous control, these representations must be both compositional and temporally consistent. Existing approaches based on recurrent processing often lack long-term stability across frames because their training objective does not enforce temporal consistency. In this work, we introduce a novel object-level temporal contrastive loss for video object-centric models that explicitly promotes temporal consistency. Our method significantly improves the temporal consistency of the learned object-centric representations, yielding more reliable video decompositions that facilitate challenging downstream tasks such as unsupervised object dynamics prediction. Furthermore, the inductive bias added by our loss strongly improves object discovery, leading to state-of-the-art results on both synthetic and real-world datasets, outperforming even weakly-supervised methods that leverage motion masks as additional cues. </p> </div> </dd> <dt> <a name='item272'>[272]</a> <a href ="/abs/2412.19529" title="Abstract" id="2412.19529"> arXiv:2412.19529 </a> (replaced) [<a href="/pdf/2412.19529" title="Download PDF" id="pdf-2412.19529" aria-labelledby="pdf-2412.19529">pdf</a>, <a href="https://arxiv.org/html/2412.19529v3" title="View HTML" id="html-2412.19529" aria-labelledby="html-2412.19529" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2412.19529" title="Other formats" id="oth-2412.19529" aria-labelledby="oth-2412.19529">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Nonconvex Stochastic Optimization under Heavy-Tailed Noises: Optimal Convergence without Gradient Clipping </div> <div class='list-authors'><a href="https://arxiv.org/search/math?searchtype=author&query=Liu,+Z">Zijian Liu</a>, <a href="https://arxiv.org/search/math?searchtype=author&query=Zhou,+Z">Zhengyuan Zhou</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> ICLR 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Optimization and Control (math.OC)</span>; Machine Learning (cs.LG); Machine Learning (stat.ML) </div> <p class='mathjax'> Recently, the study of heavy-tailed noises in first-order nonconvex stochastic optimization has gotten a lot of attention since it was recognized as a more realistic condition as suggested by many empirical observations. Specifically, the stochastic noise (the difference between the stochastic and true gradient) is considered to have only a finite $\mathfrak{p}$-th moment where $\mathfrak{p}\in\left(1,2\right]$ instead of assuming it always satisfies the classical finite variance assumption. To deal with this more challenging setting, people have proposed different algorithms and proved them to converge at an optimal $\mathcal{O}(T^{\frac{1-\mathfrak{p}}{3\mathfrak{p}-2}})$ rate for smooth objectives after $T$ iterations. Notably, all these new-designed algorithms are based on the same technique - gradient clipping. Naturally, one may want to know whether the clipping method is a necessary ingredient and the only way to guarantee convergence under heavy-tailed noises. In this work, by revisiting the existing Batched Normalized Stochastic Gradient Descent with Momentum (Batched NSGDM) algorithm, we provide the first convergence result under heavy-tailed noises but without gradient clipping. Concretely, we prove that Batched NSGDM can achieve the optimal $\mathcal{O}(T^{\frac{1-\mathfrak{p}}{3\mathfrak{p}-2}})$ rate even under the relaxed smooth condition. More interestingly, we also establish the first $\mathcal{O}(T^{\frac{1-\mathfrak{p}}{2\mathfrak{p}}})$ convergence rate in the case where the tail index $\mathfrak{p}$ is unknown in advance, which is arguably the common scenario in practice. </p> </div> </dd> <dt> <a name='item273'>[273]</a> <a href ="/abs/2412.20173" title="Abstract" id="2412.20173"> arXiv:2412.20173 </a> (replaced) [<a href="/pdf/2412.20173" title="Download PDF" id="pdf-2412.20173" aria-labelledby="pdf-2412.20173">pdf</a>, <a href="https://arxiv.org/html/2412.20173v3" title="View HTML" id="html-2412.20173" aria-labelledby="html-2412.20173" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2412.20173" title="Other formats" id="oth-2412.20173" aria-labelledby="oth-2412.20173">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Debiased Nonparametric Regression for Statistical Inference and Distributionally Robustness </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Kato,+M">Masahiro Kato</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Methodology (stat.ME)</span>; Machine Learning (cs.LG); Econometrics (econ.EM); Statistics Theory (math.ST); Machine Learning (stat.ML) </div> <p class='mathjax'> This study proposes a debiasing method for smooth nonparametric estimators. While machine learning techniques such as random forests and neural networks have demonstrated strong predictive performance, their theoretical properties remain relatively underexplored. In particular, many modern algorithms lack guarantees of pointwise and uniform risk convergence, as well as asymptotic normality. These properties are essential for statistical inference and robust estimation and have been well-established for classical methods such as Nadaraya-Watson regression. To ensure these properties for various nonparametric regression estimators, we introduce a model-free debiasing method. By incorporating a correction term that estimates the conditional expected residual of the original estimator, or equivalently, its estimation error, into the initial nonparametric regression estimator, we obtain a debiased estimator that satisfies pointwise and uniform risk convergence, along with asymptotic normality, under mild smoothness conditions. These properties facilitate statistical inference and enhance robustness to covariate shift, making the method broadly applicable to a wide range of nonparametric regression problems. </p> </div> </dd> <dt> <a name='item274'>[274]</a> <a href ="/abs/2501.00513" title="Abstract" id="2501.00513"> arXiv:2501.00513 </a> (replaced) [<a href="/pdf/2501.00513" title="Download PDF" id="pdf-2501.00513" aria-labelledby="pdf-2501.00513">pdf</a>, <a href="/format/2501.00513" title="Other formats" id="oth-2501.00513" aria-labelledby="oth-2501.00513">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> CaReBench: A Fine-Grained Benchmark for Video Captioning and Retrieval </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+Y">Yifan Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+X">Xinhao Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+Y">Yichun Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Meng,+D">Desen Meng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+R">Rui Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+L">Limin Wang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Information Retrieval (cs.IR); Machine Learning (cs.LG) </div> <p class='mathjax'> Video understanding, including video captioning and retrieval, is still a great challenge for video-language models (VLMs). The existing video retrieval and caption benchmarks only include short descriptions, limits their ability of detailed video understanding evaluation. To address this problem, we present CaReBench, a testing benchmark for fine-grained video captioning and retrieval with 1,000 high-quality pairs of videos and human-annotated detailed captions. Uniquely, it provides manually separated spatial annotations and temporal annotations for each video. Based on this design, we introduce two evaluation metrics, ReBias and CapST, specifically tailored for video retrieval and video captioning tasks, respectively. These metrics enable a comprehensive investigation into the spatial and temporal biases inherent in VLMs. In addition, to handle both video retrieval and video captioning tasks in a unified framework, we develop a simple baseline based on a Multimodal Language Model (MLLM). By implementing a two-stage Supervised Fine-Tuning (SFT), we fully unlock the potential of MLLM, enabling it not only to generate detailed video descriptions but also to extract video features. Surprisingly, experimental results demonstrate that, compared to the CLIP-based models designed for retrieval and the popular MLLMs skilled in video captioning, our baseline shows competitive performance in both fine-grained video retrieval and video detailed captioning. </p> </div> </dd> <dt> <a name='item275'>[275]</a> <a href ="/abs/2501.03575" title="Abstract" id="2501.03575"> arXiv:2501.03575 </a> (replaced) [<a href="/pdf/2501.03575" title="Download PDF" id="pdf-2501.03575" aria-labelledby="pdf-2501.03575">pdf</a>, <a href="https://arxiv.org/html/2501.03575v2" title="View HTML" id="html-2501.03575" aria-labelledby="html-2501.03575" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2501.03575" title="Other formats" id="oth-2501.03575" aria-labelledby="oth-2501.03575">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Cosmos World Foundation Model Platform for Physical AI </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=NVIDIA">NVIDIA</a>: <a href="https://arxiv.org/search/cs?searchtype=author&query=Agarwal,+N">Niket Agarwal</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ali,+A">Arslan Ali</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bala,+M">Maciej Bala</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Balaji,+Y">Yogesh Balaji</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Barker,+E">Erik Barker</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cai,+T">Tiffany Cai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chattopadhyay,+P">Prithvijit Chattopadhyay</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+Y">Yongxin Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cui,+Y">Yin Cui</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ding,+Y">Yifan Ding</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dworakowski,+D">Daniel Dworakowski</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fan,+J">Jiaojiao Fan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fenzi,+M">Michele Fenzi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ferroni,+F">Francesco Ferroni</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fidler,+S">Sanja Fidler</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fox,+D">Dieter Fox</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ge,+S">Songwei Ge</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ge,+Y">Yunhao Ge</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gu,+J">Jinwei Gu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gururani,+S">Siddharth Gururani</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=He,+E">Ethan He</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+J">Jiahui Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huffman,+J">Jacob Huffman</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jannaty,+P">Pooya Jannaty</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jin,+J">Jingyi Jin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+S+W">Seung Wook Kim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kl%C3%A1r,+G">Gergely Kl谩r</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lam,+G">Grace Lam</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lan,+S">Shiyi Lan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Leal-Taixe,+L">Laura Leal-Taixe</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+A">Anqi Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+Z">Zhaoshuo Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lin,+C">Chen-Hsuan Lin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lin,+T">Tsung-Yi Lin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ling,+H">Huan Ling</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+M">Ming-Yu Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+X">Xian Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Luo,+A">Alice Luo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ma,+Q">Qianli Ma</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mao,+H">Hanzi Mao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mo,+K">Kaichun Mo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mousavian,+A">Arsalan Mousavian</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nah,+S">Seungjun Nah</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Niverty,+S">Sriharsha Niverty</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Page,+D">David Page</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Paschalidou,+D">Despoina Paschalidou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Patel,+Z">Zeeshan Patel</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pavao,+L">Lindsey Pavao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ramezanali,+M">Morteza Ramezanali</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Reda,+F">Fitsum Reda</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ren,+X">Xiaowei Ren</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sabavat,+V+R+N">Vasanth Rao Naik Sabavat</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Schmerling,+E">Ed Schmerling</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shi,+S">Stella Shi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Stefaniak,+B">Bartosz Stefaniak</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tang,+S">Shitao Tang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tchapmi,+L">Lyne Tchapmi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tredak,+P">Przemek Tredak</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tseng,+W">Wei-Cheng Tseng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Varghese,+J">Jibin Varghese</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+H">Hao Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+H">Haoxiang Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+H">Heng Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+T">Ting-Chun Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wei,+F">Fangyin Wei</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wei,+X">Xinyue Wei</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+J+Z">Jay Zhangjie Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+J">Jiashu Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+W">Wei Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yen-Chen,+L">Lin Yen-Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zeng,+X">Xiaohui Zeng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zeng,+Y">Yu Zeng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+J">Jing Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Q">Qinsheng Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Y">Yuxuan Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+Q">Qingqing Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zolkowski,+A">Artur Zolkowski</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG); Robotics (cs.RO) </div> <p class='mathjax'> Physical AI needs to be trained digitally first. It needs a digital twin of itself, the policy model, and a digital twin of the world, the world model. In this paper, we present the Cosmos World Foundation Model Platform to help developers build customized world models for their Physical AI setups. We position a world foundation model as a general-purpose world model that can be fine-tuned into customized world models for downstream applications. Our platform covers a video curation pipeline, pre-trained world foundation models, examples of post-training of pre-trained world foundation models, and video tokenizers. To help Physical AI builders solve the most critical problems of our society, we make Cosmos open-source and our models open-weight with permissive licenses available via <a href="https://github.com/nvidia-cosmos/cosmos-predict1" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item276'>[276]</a> <a href ="/abs/2501.09129" title="Abstract" id="2501.09129"> arXiv:2501.09129 </a> (replaced) [<a href="/pdf/2501.09129" title="Download PDF" id="pdf-2501.09129" aria-labelledby="pdf-2501.09129">pdf</a>, <a href="https://arxiv.org/html/2501.09129v2" title="View HTML" id="html-2501.09129" aria-labelledby="html-2501.09129" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2501.09129" title="Other formats" id="oth-2501.09129" aria-labelledby="oth-2501.09129">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Deep Self-Supervised Disturbance Mapping with the OPERA Sentinel-1 Radiometric Terrain Corrected SAR Backscatter Product </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Hardiman-Mostow,+H">Harris Hardiman-Mostow</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Marshak,+C">Charles Marshak</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Handwerger,+A+L">Alexander L. Handwerger</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 19 pages, 18 figures, 5 tables. Preprint. Submitted to JSTARS. Revised figures, clarifications, added references </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Machine Learning (cs.LG); Image and Video Processing (eess.IV) </div> <p class='mathjax'> Mapping land surface disturbances supports disaster response, resource and ecosystem management, and climate adaptation efforts. Synthetic aperture radar (SAR) is an invaluable tool for disturbance mapping, providing consistent time-series images of the ground regardless of weather or illumination conditions. Despite SAR's potential for disturbance mapping, processing SAR data to an analysis-ready format requires expertise and significant compute resources, particularly for large-scale global analysis. In October 2023, NASA's Observational Products for End-Users from Remote Sensing Analysis (OPERA) project released the near-global Radiometric Terrain Corrected SAR backscatter from Sentinel-1 (RTC-S1) dataset, providing publicly available, analysis-ready SAR imagery. In this work, we utilize this new dataset to systematically analyze land surface disturbances. As labeling SAR data is often prohibitively time-consuming, we train a self-supervised vision transformer - which requires no labels to train - on OPERA RTC-S1 data to estimate a per-pixel distribution from the set of baseline imagery and assess disturbances when there is significant deviation from the modeled distribution. To test our model's capability and generality, we evaluate three different natural disasters - which represent high-intensity, abrupt disturbances - from three different regions of the world. Across events, our approach yields high quality delineations: F1 scores exceeding 0.6 and Areas Under the Precision-Recall Curve exceeding 0.65, consistently outperforming existing SAR disturbance methods. Our findings suggest that a self-supervised vision transformer is well-suited for global disturbance mapping and can be a valuable tool for operational, near-global disturbance monitoring, particularly when labeled data does not exist. </p> </div> </dd> <dt> <a name='item277'>[277]</a> <a href ="/abs/2501.17178" title="Abstract" id="2501.17178"> arXiv:2501.17178 </a> (replaced) [<a href="/pdf/2501.17178" title="Download PDF" id="pdf-2501.17178" aria-labelledby="pdf-2501.17178">pdf</a>, <a href="https://arxiv.org/html/2501.17178v3" title="View HTML" id="html-2501.17178" aria-labelledby="html-2501.17178" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2501.17178" title="Other formats" id="oth-2501.17178" aria-labelledby="oth-2501.17178">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Tuning LLM Judge Design Decisions for 1/1000 of the Cost </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Salinas,+D">David Salinas</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Swelam,+O">Omar Swelam</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hutter,+F">Frank Hutter</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> Evaluating Large Language Models (LLMs) often requires costly human annotations. To address this, LLM-based judges have been proposed, which compare the outputs of two LLMs enabling the ranking of models without human intervention. While several approaches have been proposed, many confounding factors are present between different papers. For instance the model, the prompt and other hyperparameters are typically changed at the same time making apple-to-apple comparisons challenging. In this paper, we propose to systematically analyze and tune hyperparameter of LLM judges. To alleviate the high cost of evaluating a judge, we propose to leverage multi-objective multi-fidelity which allows to find judges that trades accuracy for cost and also reduce significantly the cost of the search. Our method identifies judges that not only outperform existing benchmarks in accuracy and cost-efficiency but also utilize open-weight models, ensuring greater accessibility and reproducibility. </p> </div> </dd> <dt> <a name='item278'>[278]</a> <a href ="/abs/2502.06777" title="Abstract" id="2502.06777"> arXiv:2502.06777 </a> (replaced) [<a href="/pdf/2502.06777" title="Download PDF" id="pdf-2502.06777" aria-labelledby="pdf-2502.06777">pdf</a>, <a href="https://arxiv.org/html/2502.06777v2" title="View HTML" id="html-2502.06777" aria-labelledby="html-2502.06777" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2502.06777" title="Other formats" id="oth-2502.06777" aria-labelledby="oth-2502.06777">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Learning an Optimal Assortment Policy under Observational Data </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Han,+Y">Yuxuan Han</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Zhong,+H">Han Zhong</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Lu,+M">Miao Lu</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Blanchet,+J">Jose Blanchet</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Zhou,+Z">Zhengyuan Zhou</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Machine Learning (cs.LG); Optimization and Control (math.OC); Statistics Theory (math.ST) </div> <p class='mathjax'> We study the fundamental problem of offline assortment optimization under the Multinomial Logit (MNL) model, where sellers must determine the optimal subset of the products to offer based solely on historical customer choice data. While most existing approaches to learning-based assortment optimization focus on the online learning of the optimal assortment through repeated interactions with customers, such exploration can be costly or even impractical in many real-world settings. In this paper, we consider the offline learning paradigm and investigate the minimal data requirements for efficient offline assortment optimization. To this end, we introduce Pessimistic Rank-Breaking (PRB), an algorithm that combines rank-breaking with pessimistic estimation. We prove that PRB is nearly minimax optimal by establishing the tight suboptimality upper bound and a nearly matching lower bound. This further shows that "optimal item coverage" - where each item in the optimal assortment appears sufficiently often in the historical data - is both sufficient and necessary for efficient offline learning. This significantly relaxes the previous requirement of observing the complete optimal assortment in the data. Our results provide fundamental insights into the data requirements for offline assortment optimization under the MNL model. </p> </div> </dd> <dt> <a name='item279'>[279]</a> <a href ="/abs/2502.19770" title="Abstract" id="2502.19770"> arXiv:2502.19770 </a> (replaced) [<a href="/pdf/2502.19770" title="Download PDF" id="pdf-2502.19770" aria-labelledby="pdf-2502.19770">pdf</a>, <a href="https://arxiv.org/html/2502.19770v2" title="View HTML" id="html-2502.19770" aria-labelledby="html-2502.19770" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2502.19770" title="Other formats" id="oth-2502.19770" aria-labelledby="oth-2502.19770">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> TAPE: Tailored Posterior Difference for Auditing of Machine Unlearning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+W">Weiqi Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tian,+Z">Zhiyi Tian</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+A">An Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+S">Shui Yu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Cryptography and Security (cs.CR)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> With the increasing prevalence of Web-based platforms handling vast amounts of user data, machine unlearning has emerged as a crucial mechanism to uphold users' right to be forgotten, enabling individuals to request the removal of their specified data from trained models. However, the auditing of machine unlearning processes remains significantly underexplored. Although some existing methods offer unlearning auditing by leveraging backdoors, these backdoor-based approaches are inefficient and impractical, as they necessitate involvement in the initial model training process to embed the backdoors. In this paper, we propose a TAilored Posterior diffErence (TAPE) method to provide unlearning auditing independently of original model training. We observe that the process of machine unlearning inherently introduces changes in the model, which contains information related to the erased data. TAPE leverages unlearning model differences to assess how much information has been removed through the unlearning operation. Firstly, TAPE mimics the unlearned posterior differences by quickly building unlearned shadow models based on first-order influence estimation. Secondly, we train a Reconstructor model to extract and evaluate the private information of the unlearned posterior differences to audit unlearning. Existing privacy reconstructing methods based on posterior differences are only feasible for model updates of a single sample. To enable the reconstruction effective for multi-sample unlearning requests, we propose two strategies, unlearned data perturbation and unlearned influence-based division, to augment the posterior difference. Extensive experimental results indicate the significant superiority of TAPE over the state-of-the-art unlearning verification methods, at least 4.5$\times$ efficiency speedup and supporting the auditing for broader unlearning scenarios. </p> </div> </dd> <dt> <a name='item280'>[280]</a> <a href ="/abs/2503.07378" title="Abstract" id="2503.07378"> arXiv:2503.07378 </a> (replaced) [<a href="/pdf/2503.07378" title="Download PDF" id="pdf-2503.07378" aria-labelledby="pdf-2503.07378">pdf</a>, <a href="https://arxiv.org/html/2503.07378v4" title="View HTML" id="html-2503.07378" aria-labelledby="html-2503.07378" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.07378" title="Other formats" id="oth-2503.07378" aria-labelledby="oth-2503.07378">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Materials Map Integrating Experimental and Computational Data through Graph-Based Machine Learning for Enhanced Materials Discovery </div> <div class='list-authors'><a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Hashimoto,+Y">Yusuke Hashimoto</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Jia,+X">Xue Jia</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Li,+H">Hao Li</a>, <a href="https://arxiv.org/search/cond-mat?searchtype=author&query=Tomai,+T">Takaaki Tomai</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Materials Science (cond-mat.mtrl-sci)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Materials informatics (MI), which emerges from the integration of materials science and data science, is expected to greatly streamline material discovery and development. The data used for MI are obtained from both computational and experimental studies, while their integration remains challenging. In our previous study, we reported the integration of these datasets by applying a machine learning model that captures trends hidden in the experimental datasets to compositional data stored in the computational database. In this study, we use the obtained data to construct materials maps, which visualize the relation in the structural features of materials, aiming to support study by the experimental researchers. The map is constructed using a MatDeepLearn (MDL) framework, which implements the graph-based representation of material structures, deep learning, and dimensional reduction for map construction. We evaluate the obtained materials maps through statistical analysis and found that MDL using message passing neural network (MPNN) architecture enables efficient extraction of features that reflect the structural complexity of materials. Moreover, we found that this advantage does not necessarily translate into improved accuracy in the prediction of material properties. We assume this unexpected outcome to the high learning performance inherent in MPNN, which can contribute to the structuring of data points within the materials map. </p> </div> </dd> <dt> <a name='item281'>[281]</a> <a href ="/abs/2503.09829" title="Abstract" id="2503.09829"> arXiv:2503.09829 </a> (replaced) [<a href="/pdf/2503.09829" title="Download PDF" id="pdf-2503.09829" aria-labelledby="pdf-2503.09829">pdf</a>, <a href="https://arxiv.org/html/2503.09829v2" title="View HTML" id="html-2503.09829" aria-labelledby="html-2503.09829" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.09829" title="Other formats" id="oth-2503.09829" aria-labelledby="oth-2503.09829">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> SE(3)-Equivariant Robot Learning and Control: A Tutorial Survey </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Seo,+J">Joohwan Seo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yoo,+S">Soochul Yoo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chang,+J">Junwoo Chang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=An,+H">Hyunseok An</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ryu,+H">Hyunwoo Ryu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lee,+S">Soomi Lee</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kruthiventy,+A">Arvind Kruthiventy</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Choi,+J">Jongeun Choi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Horowitz,+R">Roberto Horowitz</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Submitted to International Journcal of Control, Automation and Systems (IJCAS), Under Review </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span>; Machine Learning (cs.LG); Systems and Control (eess.SY) </div> <p class='mathjax'> Recent advances in deep learning and Transformers have driven major breakthroughs in robotics by employing techniques such as imitation learning, reinforcement learning, and LLM-based multimodal perception and decision-making. However, conventional deep learning and Transformer models often struggle to process data with inherent symmetries and invariances, typically relying on large datasets or extensive data augmentation. Equivariant neural networks overcome these limitations by explicitly integrating symmetry and invariance into their architectures, leading to improved efficiency and generalization. This tutorial survey reviews a wide range of equivariant deep learning and control methods for robotics, from classic to state-of-the-art, with a focus on SE(3)-equivariant models that leverage the natural 3D rotational and translational symmetries in visual robotic manipulation and control design. Using unified mathematical notation, we begin by reviewing key concepts from group theory, along with matrix Lie groups and Lie algebras. We then introduce foundational group-equivariant neural network design and show how the group-equivariance can be obtained through their structure. Next, we discuss the applications of SE(3)-equivariant neural networks in robotics in terms of imitation learning and reinforcement learning. The SE(3)-equivariant control design is also reviewed from the perspective of geometric control. Finally, we highlight the challenges and future directions of equivariant methods in developing more robust, sample-efficient, and multi-modal real-world robotic systems. </p> </div> </dd> <dt> <a name='item282'>[282]</a> <a href ="/abs/2503.10118" title="Abstract" id="2503.10118"> arXiv:2503.10118 </a> (replaced) [<a href="/pdf/2503.10118" title="Download PDF" id="pdf-2503.10118" aria-labelledby="pdf-2503.10118">pdf</a>, <a href="https://arxiv.org/html/2503.10118v2" title="View HTML" id="html-2503.10118" aria-labelledby="html-2503.10118" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.10118" title="Other formats" id="oth-2503.10118" aria-labelledby="oth-2503.10118">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> An Real-Sim-Real (RSR) Loop Framework for Generalizable Robotic Policy Transfer with Differentiable Simulation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Shi,+L">Lu Shi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+Y">Yuxuan Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+S">Shiyu Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+J">Jinhao Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+W">Wenhao Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jia,+Y">Yufei Jia</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yan,+Z">Zike Yan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gu,+W">Weibin Gu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+G">Guyue Zhou</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Robotics (cs.RO)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> The sim-to-real gap remains a critical challenge in robotics, hindering the deployment of algorithms trained in simulation to real-world systems. This paper introduces a novel Real-Sim-Real (RSR) loop framework leveraging differentiable simulation to address this gap by iteratively refining simulation parameters, aligning them with real-world conditions, and enabling robust and efficient policy transfer. A key contribution of our work is the design of an informative cost function that encourages the collection of diverse and representative real-world data, minimizing bias and maximizing the utility of each data point for simulation refinement. This cost function integrates seamlessly into existing reinforcement learning algorithms (e.g., PPO, SAC) and ensures a balanced exploration of critical regions in the real domain. Furthermore, our approach is implemented on the versatile Mujoco MJX platform, and our framework is compatible with a wide range of robotic systems. Experimental results on several robotic manipulation tasks demonstrate that our method significantly reduces the sim-to-real gap, achieving high task performance and generalizability across diverse scenarios of both explicit and implicit environmental uncertainties. </p> </div> </dd> <dt> <a name='item283'>[283]</a> <a href ="/abs/2503.10460" title="Abstract" id="2503.10460"> arXiv:2503.10460 </a> (replaced) [<a href="/pdf/2503.10460" title="Download PDF" id="pdf-2503.10460" aria-labelledby="pdf-2503.10460">pdf</a>, <a href="https://arxiv.org/html/2503.10460v2" title="View HTML" id="html-2503.10460" aria-labelledby="html-2503.10460" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.10460" title="Other formats" id="oth-2503.10460" aria-labelledby="oth-2503.10460">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Light-R1: Curriculum SFT, DPO and RL for Long COT from Scratch and Beyond </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Wen,+L">Liang Wen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cai,+Y">Yunke Cai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xiao,+F">Fenrui Xiao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=He,+X">Xin He</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=An,+Q">Qi An</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Duan,+Z">Zhenyu Duan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Du,+Y">Yimin Du</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+J">Junchen Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tang,+L">Lifu Tang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lv,+X">Xiaowei Lv</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zou,+H">Haosheng Zou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Deng,+Y">Yongchao Deng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jia,+S">Shousheng Jia</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+X">Xiangzheng Zhang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> v2: better writing & format for later submission; all release at <a href="https://github.com/Qihoo360/Light-R1" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> This paper introduces Light-R1, an open-source suite for training long reasoning models using reproducible and cost-effective methodology. Given the proprietary nature of data used in the DeepSeek-R1 series, we develop an alternative approach leveraging exclusively public data and models. Our curriculum training progressively increases data difficulty, combined with multi-staged post-training. Our Light-R1-32B model, trained from Qwen2.5-32B-Instruct, outperforms DeepSeek-R1-Distill-Qwen-32B in math reasoning. <br>Experimental results show that this curriculum approach becomes more effective when distinct, diverse datasets are available for different training stages: fine-tuning DeepSeek-R1-Distilled models (pre-tuned by DeepSeek team on proprietary data) with 3,000 challenging examples from our curriculum dataset yielded state-of-the-art 7B and 14B models, while the 32B model, Light-R1-32B-DS performed comparably to QwQ-32B and DeepSeek-R1. <br>Furthermore, we extend our work by applying GRPO on long reasoning models. Our final Light-R1-14B-DS achieves SOTA performance among 14B models in math, with AIME24 \& 25 scores of 74.0 and 60.2 respectively, surpassing many 32B models and DeepSeek-R1-Distill-Llama-70B. Despite math-focused training, Light-R1-14B-DS demonstrates strong cross-domain generalization. <br>Light-R1 represents a significant advancement in making sophisticated reasoning models more accessible and implementable in real-world applications. Our models, training data and code have been made available at <a href="https://github.com/Qihoo360/Light-R1" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item284'>[284]</a> <a href ="/abs/2503.10489" title="Abstract" id="2503.10489"> arXiv:2503.10489 </a> (replaced) [<a href="/pdf/2503.10489" title="Download PDF" id="pdf-2503.10489" aria-labelledby="pdf-2503.10489">pdf</a>, <a href="https://arxiv.org/html/2503.10489v2" title="View HTML" id="html-2503.10489" aria-labelledby="html-2503.10489" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.10489" title="Other formats" id="oth-2503.10489" aria-labelledby="oth-2503.10489">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Beyond Atoms: Enhancing Molecular Pretrained Representations with 3D Space Modeling </div> <div class='list-authors'><a href="https://arxiv.org/search/q-bio?searchtype=author&query=Lu,+S">Shuqi Lu</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Ji,+X">Xiaohong Ji</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Zhang,+B">Bohang Zhang</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Yao,+L">Lin Yao</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Liu,+S">Siyuan Liu</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Gao,+Z">Zhifeng Gao</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Zhang,+L">Linfeng Zhang</a>, <a href="https://arxiv.org/search/q-bio?searchtype=author&query=Ke,+G">Guolin Ke</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Biomolecules (q-bio.BM)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Molecular pretrained representations (MPR) has emerged as a powerful approach for addressing the challenge of limited supervised data in applications such as drug discovery and material design. While early MPR methods relied on 1D sequences and 2D graphs, recent advancements have incorporated 3D conformational information to capture rich atomic interactions. However, these prior models treat molecules merely as discrete atom sets, overlooking the space surrounding them. We argue from a physical perspective that only modeling these discrete points is insufficient. We first present a simple yet insightful observation: naively adding randomly sampled virtual points beyond atoms can surprisingly enhance MPR performance. In light of this, we propose a principled framework that incorporates the entire 3D space spanned by molecules. We implement the framework via a novel Transformer-based architecture, dubbed SpaceFormer, with three key components: (1) grid-based space discretization; (2) grid sampling/merging; and (3) efficient 3D positional encoding. Extensive experiments show that SpaceFormer significantly outperforms previous 3D MPR models across various downstream tasks with limited data, validating the benefit of leveraging the additional 3D space beyond atoms in MPR models. </p> </div> </dd> <dt> <a name='item285'>[285]</a> <a href ="/abs/2503.10905" title="Abstract" id="2503.10905"> arXiv:2503.10905 </a> (replaced) [<a href="/pdf/2503.10905" title="Download PDF" id="pdf-2503.10905" aria-labelledby="pdf-2503.10905">pdf</a>, <a href="https://arxiv.org/html/2503.10905v2" title="View HTML" id="html-2503.10905" aria-labelledby="html-2503.10905" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.10905" title="Other formats" id="oth-2503.10905" aria-labelledby="oth-2503.10905">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Learning to Inference Adaptively for Multimodal Large Language Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+Z">Zhuoyan Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Nguyen,+K+D">Khoi Duc Nguyen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mukherjee,+P">Preeti Mukherjee</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bagchi,+S">Saurabh Bagchi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chaterji,+S">Somali Chaterji</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liang,+Y">Yingyu Liang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+Y">Yin Li</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Artificial Intelligence (cs.AI)</span>; Computer Vision and Pattern Recognition (cs.CV); Machine Learning (cs.LG) </div> <p class='mathjax'> Multimodal Large Language Models (MLLMs) have shown impressive capabilities in reasoning, yet come with substantial computational cost, limiting their deployment in resource-constrained settings. Despite recent efforts on improving the efficiency of MLLMs, prior solutions fall short in responding to varying runtime conditions, in particular changing resource availability (e.g., contention due to the execution of other programs on the device). To bridge this gap, we introduce AdaLLaVA, an adaptive inference framework that learns to dynamically reconfigure operations in an MLLM during inference, accounting for the input data and a latency budget. We conduct extensive experiments across benchmarks involving question-answering, reasoning, and hallucination. Our results show that AdaLLaVA effectively adheres to input latency budget, achieving varying accuracy and latency tradeoffs at runtime. Further, we demonstrate that AdaLLaVA adapts to both input latency and content, can be integrated with token selection for enhanced efficiency, and generalizes across MLLMs. Our project webpage with code release is at <a href="https://zhuoyan-xu.github.io/ada-llava/" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item286'>[286]</a> <a href ="/abs/2503.11209" title="Abstract" id="2503.11209"> arXiv:2503.11209 </a> (replaced) [<a href="/pdf/2503.11209" title="Download PDF" id="pdf-2503.11209" aria-labelledby="pdf-2503.11209">pdf</a>, <a href="/format/2503.11209" title="Other formats" id="oth-2503.11209" aria-labelledby="oth-2503.11209">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Clustering Items through Bandit Feedback: Finding the Right Feature out of Many </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Graf,+M">Maximilian Graf</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Thuot,+V">Victor Thuot</a> (MISTEA), <a href="https://arxiv.org/search/stat?searchtype=author&query=Verzelen,+N">Nicolas Verzelen</a> (MISTEA)</div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> We study the problem of clustering a set of items based on bandit feedback. Each of the $n$ items is characterized by a feature vector, with a possibly large dimension $d$. The items are partitioned into two unknown groups such that items within the same group share the same feature vector. We consider a sequential and adaptive setting in which, at each round, the learner selects one item and one feature, then observes a noisy evaluation of the item's feature. The learner's objective is to recover the correct partition of the items, while keeping the number of observations as small as possible. We provide an algorithm which relies on finding a relevant feature for the clustering task, leveraging the Sequential Halving algorithm. With probability at least $1-\delta$, we obtain an accurate recovery of the partition and derive an upper bound on the budget required. Furthermore, we derive an instance-dependent lower bound, which is tight in some relevant cases. </p> </div> </dd> <dt> <a name='item287'>[287]</a> <a href ="/abs/2503.12358" title="Abstract" id="2503.12358"> arXiv:2503.12358 </a> (replaced) [<a href="/pdf/2503.12358" title="Download PDF" id="pdf-2503.12358" aria-labelledby="pdf-2503.12358">pdf</a>, <a href="https://arxiv.org/html/2503.12358v2" title="View HTML" id="html-2503.12358" aria-labelledby="html-2503.12358" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.12358" title="Other formats" id="oth-2503.12358" aria-labelledby="oth-2503.12358">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> IPCGRL: Language-Instructed Reinforcement Learning for Procedural Level Generation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Baek,+I">In-Chang Baek</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+S">Sung-Hyun Kim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lee,+S">Seo-Young Lee</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+D">Dong-Hyeun Kim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+K">Kyung-Joong Kim</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 9 pages, 9 figures, 3 tables </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Artificial Intelligence (cs.AI)</span>; Computation and Language (cs.CL); Machine Learning (cs.LG) </div> <p class='mathjax'> Recent research has highlighted the significance of natural language in enhancing the controllability of generative models. While various efforts have been made to leverage natural language for content generation, research on deep reinforcement learning (DRL) agents utilizing text-based instructions for procedural content generation remains limited. In this paper, we propose IPCGRL, an instruction-based procedural content generation method via reinforcement learning, which incorporates a sentence embedding model. IPCGRL fine-tunes task-specific embedding representations to effectively compress game-level conditions. We evaluate IPCGRL in a two-dimensional level generation task and compare its performance with a general-purpose embedding method. The results indicate that IPCGRL achieves up to a 21.4% improvement in controllability and a 17.2% improvement in generalizability for unseen instructions. Furthermore, the proposed method extends the modality of conditional input, enabling a more flexible and expressive interaction framework for procedural content generation. </p> </div> </dd> <dt> <a name='item288'>[288]</a> <a href ="/abs/2503.12808" title="Abstract" id="2503.12808"> arXiv:2503.12808 </a> (replaced) [<a href="/pdf/2503.12808" title="Download PDF" id="pdf-2503.12808" aria-labelledby="pdf-2503.12808">pdf</a>, <a href="https://arxiv.org/html/2503.12808v2" title="View HTML" id="html-2503.12808" aria-labelledby="html-2503.12808" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.12808" title="Other formats" id="oth-2503.12808" aria-labelledby="oth-2503.12808">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Estimating stationary mass, frequency by frequency </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Nakul,+M">Milind Nakul</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Muthukumar,+V">Vidya Muthukumar</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Pananjady,+A">Ashwin Pananjady</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Information Theory (cs.IT); Machine Learning (cs.LG); Probability (math.PR); Statistics Theory (math.ST) </div> <p class='mathjax'> Suppose we observe a trajectory of length $n$ from an $\alpha$-mixing stochastic process over a finite but potentially large state space. We consider the problem of estimating the probability mass placed by the stationary distribution of any such process on elements that occur with a certain frequency in the observed sequence. We estimate this vector of probabilities in total variation distance, showing universal consistency in $n$ and recovering known results for i.i.d. sequences as special cases. Our proposed methodology carefully combines the plug-in (or empirical) estimator with a recently-proposed modification of the Good--Turing estimator called WingIt, which was originally developed for Markovian sequences. En route to controlling the error of our estimator, we develop new performance bounds on WingIt and the plug-in estimator for $\alpha$-mixing stochastic processes. Importantly, the extensively used method of Poissonization can no longer be applied in our non i.i.d. setting, and so we develop complementary tools -- including concentration inequalities for a natural self-normalized statistic of mixing sequences -- that may prove independently useful in the design and analysis of estimators for related problems. </p> </div> </dd> <dt> <a name='item289'>[289]</a> <a href ="/abs/2503.12813" title="Abstract" id="2503.12813"> arXiv:2503.12813 </a> (replaced) [<a href="/pdf/2503.12813" title="Download PDF" id="pdf-2503.12813" aria-labelledby="pdf-2503.12813">pdf</a>, <a href="https://arxiv.org/html/2503.12813v2" title="View HTML" id="html-2503.12813" aria-labelledby="html-2503.12813" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.12813" title="Other formats" id="oth-2503.12813" aria-labelledby="oth-2503.12813">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Epidemic Forecasting with a Hybrid Deep Learning Method Using CNN-LSTM With WOA-GWO Parameter Optimization: Global COVID-19 Case Study </div> <div class='list-authors'><a href="https://arxiv.org/search/eess?searchtype=author&query=Alizadeh,+M">Mousa Alizadeh</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Samaei,+M+H">Mohammad Hossein Samaei</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Seilsepour,+A">Azam Seilsepour</a>, <a href="https://arxiv.org/search/eess?searchtype=author&query=Beheshti,+M+T">Mohammad TH Beheshti</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Image and Video Processing (eess.IV)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Effective epidemic modeling is essential for managing public health crises, requiring robust methods to predict disease spread and optimize resource allocation. This study introduces a novel deep learning framework that advances time series forecasting for infectious diseases, with its application to COVID 19 data as a critical case study. Our hybrid approach integrates Convolutional Neural Networks (CNNs) and Long Short Term Memory (LSTM) models to capture spatial and temporal dynamics of disease transmission across diverse regions. The CNN extracts spatial features from raw epidemiological data, while the LSTM models temporal patterns, yielding precise and adaptable predictions. To maximize performance, we employ a hybrid optimization strategy combining the Whale Optimization Algorithm (WOA) and Gray Wolf Optimization (GWO) to fine tune hyperparameters, such as learning rates, batch sizes, and training epochs enhancing model efficiency and accuracy. Applied to COVID 19 case data from 24 countries across six continents, our method outperforms established benchmarks, including ARIMA and standalone LSTM models, with statistically significant gains in predictive accuracy (e.g., reduced RMSE). This framework demonstrates its potential as a versatile method for forecasting epidemic trends, offering insights for resource planning and decision making in both historical contexts, like the COVID 19 pandemic, and future outbreaks. </p> </div> </dd> <dt> <a name='item290'>[290]</a> <a href ="/abs/2503.13050" title="Abstract" id="2503.13050"> arXiv:2503.13050 </a> (replaced) [<a href="/pdf/2503.13050" title="Download PDF" id="pdf-2503.13050" aria-labelledby="pdf-2503.13050">pdf</a>, <a href="https://arxiv.org/html/2503.13050v2" title="View HTML" id="html-2503.13050" aria-labelledby="html-2503.13050" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13050" title="Other formats" id="oth-2503.13050" aria-labelledby="oth-2503.13050">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> E-Values Expand the Scope of Conformal Prediction </div> <div class='list-authors'><a href="https://arxiv.org/search/stat?searchtype=author&query=Gauthier,+E">Etienne Gauthier</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Bach,+F">Francis Bach</a>, <a href="https://arxiv.org/search/stat?searchtype=author&query=Jordan,+M+I">Michael I. Jordan</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Code available at: <a href="https://github.com/GauthierE/evalues-expand-cp" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (stat.ML)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Conformal prediction is a powerful framework for distribution-free uncertainty quantification. The standard approach to conformal prediction relies on comparing the ranks of prediction scores: under exchangeability, the rank of a future test point cannot be too extreme relative to a calibration set. This rank-based method can be reformulated in terms of p-values. In this paper, we explore an alternative approach based on e-values, known as conformal e-prediction. E-values offer key advantages that cannot be achieved with p-values, enabling new theoretical and practical capabilities. In particular, we present three applications that leverage the unique strengths of e-values: batch anytime-valid conformal prediction, fixed-size conformal sets with data-dependent coverage, and conformal prediction under ambiguous ground truth. Overall, these examples demonstrate that e-value-based constructions provide a flexible expansion of the toolbox of conformal prediction. </p> </div> </dd> <dt> <a name='item291'>[291]</a> <a href ="/abs/2503.13281" title="Abstract" id="2503.13281"> arXiv:2503.13281 </a> (replaced) [<a href="/pdf/2503.13281" title="Download PDF" id="pdf-2503.13281" aria-labelledby="pdf-2503.13281">pdf</a>, <a href="/format/2503.13281" title="Other formats" id="oth-2503.13281" aria-labelledby="oth-2503.13281">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> LLM-Match: An Open-Sourced Patient Matching Model Based on Large Language Models and Retrieval-Augmented Generation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+X">Xiaodi Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chowdhury,+S">Shaika Chowdhury</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wi,+C+I">Chung Il Wi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Vassilaki,+M">Maria Vassilaki</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+K">Ken Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sio,+T+T">Terence T Sio</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Garrick,+O">Owen Garrick</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Juhn,+Y+J">Young J Juhn</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cerhan,+J+R">James R Cerhan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tao,+C">Cui Tao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zong,+N">Nansu Zong</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 10 pages, 1 figure </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI); Machine Learning (cs.LG) </div> <p class='mathjax'> Patient matching is the process of linking patients to appropriate clinical trials by accurately identifying and matching their medical records with trial eligibility criteria. We propose LLM-Match, a novel framework for patient matching leveraging fine-tuned open-source large language models. Our approach consists of four key components. First, a retrieval-augmented generation (RAG) module extracts relevant patient context from a vast pool of electronic health records (EHRs). Second, a prompt generation module constructs input prompts by integrating trial eligibility criteria (both inclusion and exclusion criteria), patient context, and system instructions. Third, a fine-tuning module with a classification head optimizes the model parameters using structured prompts and ground-truth labels. Fourth, an evaluation module assesses the fine-tuned model's performance on the testing datasets. We evaluated LLM-Match on four open datasets - n2c2, SIGIR, TREC 2021, and TREC 2022 - using open-source models, comparing it against TrialGPT, Zero-Shot, and GPT-4-based closed models. LLM-Match outperformed all baselines. </p> </div> </dd> </dl> <div class='paging'>Total of 291 entries </div> <div class='morefewer'>Showing up to 2000 entries per page: <a href=/list/cs.LG/new?skip=0&show=1000 rel="nofollow"> fewer</a> | <span style="color: #454545">more</span> | <span style="color: #454545">all</span> </div> </div> </div> </div> </main> <footer style="clear: both;"> <div class="columns is-desktop" role="navigation" aria-label="Secondary" style="margin: -0.75em -0.75em 0.75em -0.75em"> <!-- Macro-Column 1 --> <div class="column" style="padding: 0;"> <div class="columns"> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- End Macro-Column 1 --> <!-- Macro-Column 2 --> <div class="column" style="padding: 0;"> <div class="columns"> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> <!-- End Macro-Column 2 --> </div> </footer> </div> <script src="/static/base/1.0.1/js/member_acknowledgement.js"></script> </body> </html>