Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 4,047 results for author: <span class="mathjax">Zhang, L</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Zhang%2C+L">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Zhang, L"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Zhang%2C+L&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Zhang, L"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Zhang%2C+L&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Zhang%2C+L&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhang%2C+L&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhang%2C+L&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhang%2C+L&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhang%2C+L&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10328">arXiv:2502.10328</a> <span> [<a href="https://arxiv.org/pdf/2502.10328">pdf</a>, <a href="https://arxiv.org/format/2502.10328">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Generalised Parallel Tempering: Flexible Replica Exchange via Flows and Diffusions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Leo Zhang</a>, <a href="/search/cs?searchtype=author&query=Potaptchik%2C+P">Peter Potaptchik</a>, <a href="/search/cs?searchtype=author&query=Doucet%2C+A">Arnaud Doucet</a>, <a href="/search/cs?searchtype=author&query=Dau%2C+H">Hai-Dang Dau</a>, <a href="/search/cs?searchtype=author&query=Syed%2C+S">Saifuddin Syed</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10328v1-abstract-short" style="display: inline;"> Parallel Tempering (PT) is a classical MCMC algorithm designed for leveraging parallel computation to sample efficiently from high-dimensional, multimodal or otherwise complex distributions via annealing. One limitation of the standard formulation of PT is the growth of computational resources required to generate high-quality samples, as measured by effective sample size or round trip rate, for i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10328v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10328v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10328v1-abstract-full" style="display: none;"> Parallel Tempering (PT) is a classical MCMC algorithm designed for leveraging parallel computation to sample efficiently from high-dimensional, multimodal or otherwise complex distributions via annealing. One limitation of the standard formulation of PT is the growth of computational resources required to generate high-quality samples, as measured by effective sample size or round trip rate, for increasingly challenging distributions. To address this issue, we propose the framework: Generalised Parallel Tempering (GePT) which allows for the incorporation of recent advances in modern generative modelling, such as normalising flows and diffusion models, within Parallel Tempering, while maintaining the same theoretical guarantees as MCMC-based methods. For instance, we show that this allows us to utilise diffusion models in a parallelised manner, bypassing the usual computational cost of a large number of steps to generate quality samples. Further, we empirically demonstrate that GePT can improve sample quality and reduce the growth of computational resources required to handle complex distributions over the classical algorithm. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10328v1-abstract-full').style.display = 'none'; document.getElementById('2502.10328v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.10157">arXiv:2502.10157</a> <span> [<a href="https://arxiv.org/pdf/2502.10157">pdf</a>, <a href="https://arxiv.org/format/2502.10157">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> SessionRec: Next Session Prediction Paradigm For Generative Sequential Recommendation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+L">Lei Huang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+H">Hao Guo</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+L">Linzhi Peng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Long Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaoteng Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Daoyuan Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shichao Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jinpeng Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Lei Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S">Sheng Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.10157v1-abstract-short" style="display: inline;"> We introduce SessionRec, a novel next-session prediction paradigm (NSPP) for generative sequential recommendation, addressing the fundamental misalignment between conventional next-item prediction paradigm (NIPP) and real-world recommendation scenarios. Unlike NIPP's item-level autoregressive generation that contradicts actual session-based user interactions, our framework introduces a session-awa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10157v1-abstract-full').style.display = 'inline'; document.getElementById('2502.10157v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.10157v1-abstract-full" style="display: none;"> We introduce SessionRec, a novel next-session prediction paradigm (NSPP) for generative sequential recommendation, addressing the fundamental misalignment between conventional next-item prediction paradigm (NIPP) and real-world recommendation scenarios. Unlike NIPP's item-level autoregressive generation that contradicts actual session-based user interactions, our framework introduces a session-aware representation learning through hierarchical sequence aggregation (intra/inter-session), reducing attention computation complexity while enabling implicit modeling of massive negative interactions, and a session-based prediction objective that better captures users' diverse interests through multi-item recommendation in next sessions. Moreover, we found that incorporating a rank loss for items within the session under the next session prediction paradigm can significantly improve the ranking effectiveness of generative sequence recommendation models. We also verified that SessionRec exhibits clear power-law scaling laws similar to those observed in LLMs. Extensive experiments conducted on public datasets and online A/B test in Meituan App demonstrate the effectiveness of SessionRec. The proposed paradigm establishes new foundations for developing industrial-scale generative recommendation systems through its model-agnostic architecture and computational efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.10157v1-abstract-full').style.display = 'none'; document.getElementById('2502.10157v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09977">arXiv:2502.09977</a> <span> [<a href="https://arxiv.org/pdf/2502.09977">pdf</a>, <a href="https://arxiv.org/format/2502.09977">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> LaRA: Benchmarking Retrieval-Augmented Generation and Long-Context LLMs - No Silver Bullet for LC or RAG Routing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+K">Kuan Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Liwen Zhang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yong Jiang</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+P">Pengjun Xie</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+F">Fei Huang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuai Wang</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+M">Minhao Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09977v1-abstract-short" style="display: inline;"> Effectively incorporating external knowledge into Large Language Models (LLMs) is crucial for enhancing their capabilities and addressing real-world needs. Retrieval-Augmented Generation (RAG) offers an effective method for achieving this by retrieving the most relevant fragments into LLMs. However, the advancements in context window size for LLMs offer an alternative approach, raising the questio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09977v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09977v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09977v1-abstract-full" style="display: none;"> Effectively incorporating external knowledge into Large Language Models (LLMs) is crucial for enhancing their capabilities and addressing real-world needs. Retrieval-Augmented Generation (RAG) offers an effective method for achieving this by retrieving the most relevant fragments into LLMs. However, the advancements in context window size for LLMs offer an alternative approach, raising the question of whether RAG remains necessary for effectively handling external knowledge. Several existing studies provide inconclusive comparisons between RAG and long-context (LC) LLMs, largely due to limitations in the benchmark designs. In this paper, we present LaRA, a novel benchmark specifically designed to rigorously compare RAG and LC LLMs. LaRA encompasses 2,326 test cases across four practical QA task categories and three types of naturally occurring long texts. Through systematic evaluation of seven open-source and four proprietary LLMs, we find that the optimal choice between RAG and LC depends on a complex interplay of factors, including the model's parameter size, long-text capabilities, context length, task type, and the characteristics of the retrieved chunks. Our findings provide actionable guidelines for practitioners to effectively leverage both RAG and LC approaches in developing and deploying LLM applications. Our code and dataset is provided at: \href{https://github.com/likuanppd/LaRA}{\textbf{https://github.com/likuanppd/LaRA}}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09977v1-abstract-full').style.display = 'none'; document.getElementById('2502.09977v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">22 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09970">arXiv:2502.09970</a> <span> [<a href="https://arxiv.org/pdf/2502.09970">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Universal Machine Learning Interatomic Potentials are Ready for Solid Ion Conductors </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Du%2C+H">Hongwei Du</a>, <a href="/search/cs?searchtype=author&query=Hui%2C+J">Jian Hui</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lanting Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hong Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09970v1-abstract-short" style="display: inline;"> With the rapid development of energy storage technology, high-performance solid-state electrolytes (SSEs) have become critical for next-generation lithium-ion batteries. These materials require high ionic conductivity, excellent electrochemical stability, and good mechanical properties to meet the demands of electric vehicles and portable electronics. However, traditional methods like density func… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09970v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09970v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09970v1-abstract-full" style="display: none;"> With the rapid development of energy storage technology, high-performance solid-state electrolytes (SSEs) have become critical for next-generation lithium-ion batteries. These materials require high ionic conductivity, excellent electrochemical stability, and good mechanical properties to meet the demands of electric vehicles and portable electronics. However, traditional methods like density functional theory (DFT) and empirical force fields face challenges such as high computational costs, poor scalability, and limited accuracy across material systems. Universal machine learning interatomic potentials (uMLIPs) offer a promising solution with their efficiency and near-DFT-level accuracy.This study systematically evaluates six advanced uMLIP models (MatterSim, MACE, SevenNet, CHGNet, M3GNet, and ORBFF) in terms of energy, forces, thermodynamic properties, elastic moduli, and lithium-ion diffusion behavior. The results show that MatterSim outperforms others in nearly all metrics, particularly in complex material systems, demonstrating superior accuracy and physical consistency. Other models exhibit significant deviations due to issues like energy inconsistency or insufficient training data coverage.Further analysis reveals that MatterSim achieves excellent agreement with reference values in lithium-ion diffusivity calculations, especially at room temperature. Studies on Li3YCl6 and Li6PS5Cl uncover how crystal structure, anion disorder levels, and Na/Li arrangements influence ionic conductivity. Appropriate S/Cl disorder levels and optimized Na/Li arrangements enhance diffusion pathway connectivity, improving overall ionic transport performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09970v1-abstract-full').style.display = 'none'; document.getElementById('2502.09970v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09793">arXiv:2502.09793</a> <span> [<a href="https://arxiv.org/pdf/2502.09793">pdf</a>, <a href="https://arxiv.org/format/2502.09793">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Noise Controlled CT Super-Resolution with Conditional Diffusion Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yuang Wang</a>, <a href="/search/cs?searchtype=author&query=Yoon%2C+S">Siyeop Yoon</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+R">Rui Hu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+B">Baihui Yu</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+D">Duhgoon Lee</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+R">Rajiv Gupta</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Li Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhiqiang Chen</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+D">Dufan Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09793v1-abstract-short" style="display: inline;"> Improving the spatial resolution of CT images is a meaningful yet challenging task, often accompanied by the issue of noise amplification. This article introduces an innovative framework for noise-controlled CT super-resolution utilizing the conditional diffusion model. The model is trained on hybrid datasets, combining noise-matched simulation data with segmented details from real data. Experimen… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09793v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09793v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09793v1-abstract-full" style="display: none;"> Improving the spatial resolution of CT images is a meaningful yet challenging task, often accompanied by the issue of noise amplification. This article introduces an innovative framework for noise-controlled CT super-resolution utilizing the conditional diffusion model. The model is trained on hybrid datasets, combining noise-matched simulation data with segmented details from real data. Experimental results with real CT images validate the effectiveness of our proposed framework, showing its potential for practical applications in CT imaging. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09793v1-abstract-full').style.display = 'none'; document.getElementById('2502.09793v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The 8th International Conference on Image Formation in X-Ray Computed Tomography, Bamberg, Germany, August 5 - 9, 2024</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09675">arXiv:2502.09675</a> <span> [<a href="https://arxiv.org/pdf/2502.09675">pdf</a>, <a href="https://arxiv.org/format/2502.09675">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Multi-level Conflict-Aware Network for Multi-modal Sentiment Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gao%2C+Y">Yubo Gao</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+H">Haotian Wu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lei Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09675v1-abstract-short" style="display: inline;"> Multimodal Sentiment Analysis (MSA) aims to recognize human emotions by exploiting textual, acoustic, and visual modalities, and thus how to make full use of the interactions between different modalities is a central challenge of MSA. Interaction contains alignment and conflict aspects. Current works mainly emphasize alignment and the inherent differences between unimodal modalities, neglecting th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09675v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09675v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09675v1-abstract-full" style="display: none;"> Multimodal Sentiment Analysis (MSA) aims to recognize human emotions by exploiting textual, acoustic, and visual modalities, and thus how to make full use of the interactions between different modalities is a central challenge of MSA. Interaction contains alignment and conflict aspects. Current works mainly emphasize alignment and the inherent differences between unimodal modalities, neglecting the fact that there are also potential conflicts between bimodal combinations. Additionally, multi-task learning-based conflict modeling methods often rely on the unstable generated labels. To address these challenges, we propose a novel multi-level conflict-aware network (MCAN) for multimodal sentiment analysis, which progressively segregates alignment and conflict constituents from unimodal and bimodal representations, and further exploits the conflict constituents with the conflict modeling branch. In the conflict modeling branch, we conduct discrepancy constraints at both the representation and predicted output levels, avoiding dependence on the generated labels. Experimental results on the CMU-MOSI and CMU-MOSEI datasets demonstrate the effectiveness of the proposed MCAN. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09675v1-abstract-full').style.display = 'none'; document.getElementById('2502.09675v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 1 figure</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09608">arXiv:2502.09608</a> <span> [<a href="https://arxiv.org/pdf/2502.09608">pdf</a>, <a href="https://arxiv.org/format/2502.09608">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> Instance Segmentation of Scene Sketches Using Natural Image Priors </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tang%2C+M">Mia Tang</a>, <a href="/search/cs?searchtype=author&query=Vinker%2C+Y">Yael Vinker</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+C">Chuan Yan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lvmin Zhang</a>, <a href="/search/cs?searchtype=author&query=Agrawala%2C+M">Maneesh Agrawala</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09608v1-abstract-short" style="display: inline;"> Sketch segmentation involves grouping pixels within a sketch that belong to the same object or instance. It serves as a valuable tool for sketch editing tasks, such as moving, scaling, or removing specific components. While image segmentation models have demonstrated remarkable capabilities in recent years, sketches present unique challenges for these models due to their sparse nature and wide var… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09608v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09608v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09608v1-abstract-full" style="display: none;"> Sketch segmentation involves grouping pixels within a sketch that belong to the same object or instance. It serves as a valuable tool for sketch editing tasks, such as moving, scaling, or removing specific components. While image segmentation models have demonstrated remarkable capabilities in recent years, sketches present unique challenges for these models due to their sparse nature and wide variation in styles. We introduce SketchSeg, a method for instance segmentation of raster scene sketches. Our approach adapts state-of-the-art image segmentation and object detection models to the sketch domain by employing class-agnostic fine-tuning and refining segmentation masks using depth cues. Furthermore, our method organizes sketches into sorted layers, where occluded instances are inpainted, enabling advanced sketch editing applications. As existing datasets in this domain lack variation in sketch styles, we construct a synthetic scene sketch segmentation dataset featuring sketches with diverse brush strokes and varying levels of detail. We use this dataset to demonstrate the robustness of our approach and will release it to promote further research in the field. Project webpage: https://sketchseg.github.io/sketch-seg/ <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09608v1-abstract-full').style.display = 'none'; document.getElementById('2502.09608v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09022">arXiv:2502.09022</a> <span> [<a href="https://arxiv.org/pdf/2502.09022">pdf</a>, <a href="https://arxiv.org/format/2502.09022">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Mechanistic Unveiling of Transformer Circuits: Self-Influence as a Key to Model Reasoning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lin Zhang</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+L">Lijie Hu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Di Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09022v2-abstract-short" style="display: inline;"> Transformer-based language models have achieved significant success; however, their internal mechanisms remain largely opaque due to the complexity of non-linear interactions and high-dimensional operations. While previous studies have demonstrated that these models implicitly embed reasoning trees, humans typically employ various distinct logical reasoning mechanisms to complete the same task. It… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09022v2-abstract-full').style.display = 'inline'; document.getElementById('2502.09022v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09022v2-abstract-full" style="display: none;"> Transformer-based language models have achieved significant success; however, their internal mechanisms remain largely opaque due to the complexity of non-linear interactions and high-dimensional operations. While previous studies have demonstrated that these models implicitly embed reasoning trees, humans typically employ various distinct logical reasoning mechanisms to complete the same task. It is still unclear which multi-step reasoning mechanisms are used by language models to solve such tasks. In this paper, we aim to address this question by investigating the mechanistic interpretability of language models, particularly in the context of multi-step reasoning tasks. Specifically, we employ circuit analysis and self-influence functions to evaluate the changing importance of each token throughout the reasoning process, allowing us to map the reasoning paths adopted by the model. We apply this methodology to the GPT-2 model on a prediction task (IOI) and demonstrate that the underlying circuits reveal a human-interpretable reasoning process used by the model. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09022v2-abstract-full').style.display = 'none'; document.getElementById('2502.09022v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by NAACL2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08705">arXiv:2502.08705</a> <span> [<a href="https://arxiv.org/pdf/2502.08705">pdf</a>, <a href="https://arxiv.org/format/2502.08705">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Digital Libraries">cs.DL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Physics Education">physics.ed-ph</span> </div> </div> <p class="title is-5 mathjax"> Beyond the Lens: Quantifying the Impact of Scientific Documentaries through Amazon Reviews </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Naiman%2C+J">Jill Naiman</a>, <a href="/search/cs?searchtype=author&query=Pessianzadeh%2C+A">Aria Pessianzadeh</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+H">Hanyu Zhao</a>, <a href="/search/cs?searchtype=author&query=Christensen%2C+A">AJ Christensen</a>, <a href="/search/cs?searchtype=author&query=Nunn%2C+A">Alistair Nunn</a>, <a href="/search/cs?searchtype=author&query=Srikanth%2C+S">Shriya Srikanth</a>, <a href="/search/cs?searchtype=author&query=Gami%2C+A">Anushka Gami</a>, <a href="/search/cs?searchtype=author&query=Maxwell%2C+E">Emma Maxwell</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Louisa Zhang</a>, <a href="/search/cs?searchtype=author&query=Yeragorla%2C+S+N">Sri Nithya Yeragorla</a>, <a href="/search/cs?searchtype=author&query=Rezapour%2C+R">Rezvaneh Rezapour</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08705v1-abstract-short" style="display: inline;"> Engaging the public with science is critical for a well-informed population. A popular method of scientific communication is documentaries. Once released, it can be difficult to assess the impact of such works on a large scale, due to the overhead required for in-depth audience feedback studies. In what follows, we overview our complementary approach to qualitative studies through quantitative imp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08705v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08705v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08705v1-abstract-full" style="display: none;"> Engaging the public with science is critical for a well-informed population. A popular method of scientific communication is documentaries. Once released, it can be difficult to assess the impact of such works on a large scale, due to the overhead required for in-depth audience feedback studies. In what follows, we overview our complementary approach to qualitative studies through quantitative impact and sentiment analysis of Amazon reviews for several scientific documentaries. In addition to developing a novel impact category taxonomy for this analysis, we release a dataset containing 1296 human-annotated sentences from 1043 Amazon reviews for six movies created in whole or part by a team of visualization designers who focus on cinematic presentations of scientific data. Using this data, we train and evaluate several machine learning and large language models, discussing their effectiveness and possible generalizability for documentaries beyond those focused on for this work. Themes are also extracted from our annotated dataset which, along with our large language model analysis, demonstrate a measure of the ability of scientific documentaries to engage with the public. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08705v1-abstract-full').style.display = 'none'; document.getElementById('2502.08705v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Draft accepted to WebSci 2025, 10 pages, 3 figures, 6 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07829">arXiv:2502.07829</a> <span> [<a href="https://arxiv.org/pdf/2502.07829">pdf</a>, <a href="https://arxiv.org/format/2502.07829">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Preference Alignment on Diffusion Model: A Comprehensive Survey for Image Generation and Editing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+S">Sihao Wu</a>, <a href="/search/cs?searchtype=author&query=Si%2C+X">Xiaonan Si</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+C">Chi Xing</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jianhong Wang</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+G">Gaojie Jin</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+G">Guangliang Cheng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lijun Zhang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xiaowei Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07829v1-abstract-short" style="display: inline;"> The integration of preference alignment with diffusion models (DMs) has emerged as a transformative approach to enhance image generation and editing capabilities. Although integrating diffusion models with preference alignment strategies poses significant challenges for novices at this intersection, comprehensive and systematic reviews of this subject are still notably lacking. To bridge this gap,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07829v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07829v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07829v1-abstract-full" style="display: none;"> The integration of preference alignment with diffusion models (DMs) has emerged as a transformative approach to enhance image generation and editing capabilities. Although integrating diffusion models with preference alignment strategies poses significant challenges for novices at this intersection, comprehensive and systematic reviews of this subject are still notably lacking. To bridge this gap, this paper extensively surveys preference alignment with diffusion models in image generation and editing. First, we systematically review cutting-edge optimization techniques such as reinforcement learning with human feedback (RLHF), direct preference optimization (DPO), and others, highlighting their pivotal role in aligning preferences with DMs. Then, we thoroughly explore the applications of aligning preferences with DMs in autonomous driving, medical imaging, robotics, and more. Finally, we comprehensively discuss the challenges of preference alignment with DMs. To our knowledge, this is the first survey centered on preference alignment with DMs, providing insights to drive future innovation in this dynamic area. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07829v1-abstract-full').style.display = 'none'; document.getElementById('2502.07829v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07381">arXiv:2502.07381</a> <span> [<a href="https://arxiv.org/pdf/2502.07381">pdf</a>, <a href="https://arxiv.org/format/2502.07381">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Spatial Degradation-Aware and Temporal Consistent Diffusion Model for Compressed Video Super-Resolution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=An%2C+H">Hongyu An</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xinfeng Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+S">Shijie Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Li Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07381v2-abstract-short" style="display: inline;"> Due to limitations of storage and bandwidth, videos stored and transmitted on the Internet are usually low-quality with low-resolution and compression noise. Although video super-resolution (VSR) is an efficient technique to enhance video resolution, relatively VSR methods focus on compressed videos. Directly applying general VSR approaches leads to the failure of improving practical videos, espec… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07381v2-abstract-full').style.display = 'inline'; document.getElementById('2502.07381v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07381v2-abstract-full" style="display: none;"> Due to limitations of storage and bandwidth, videos stored and transmitted on the Internet are usually low-quality with low-resolution and compression noise. Although video super-resolution (VSR) is an efficient technique to enhance video resolution, relatively VSR methods focus on compressed videos. Directly applying general VSR approaches leads to the failure of improving practical videos, especially when frames are highly compressed at a low bit rate. Recently, diffusion models have achieved superior performance in low-level visual tasks, and their high-realism generation capability enables them to be applied in VSR. To synthesize more compression-lost details and refine temporal consistency, we propose a novel Spatial Degradation-Aware and Temporal Consistent (SDATC) diffusion model for compressed VSR. Specifically, we introduce a distortion Control module (DCM) to modulate diffusion model inputs and guide the generation. Next, the diffusion model executes the denoising process for texture generation with fine-tuned spatial prompt-based compression-aware module (PCAM) and spatio-temporal attention module (STAM). PCAM extracts features to encode specific compression information dynamically. STAM extends the spatial attention mechanism to a spatio-temporal dimension for capturing temporal correlation. Extensive experimental results on benchmark datasets demonstrate the effectiveness of the proposed modules in enhancing compressed videos. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07381v2-abstract-full').style.display = 'none'; document.getElementById('2502.07381v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07345">arXiv:2502.07345</a> <span> [<a href="https://arxiv.org/pdf/2502.07345">pdf</a>, <a href="https://arxiv.org/format/2502.07345">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Advanced Zero-Shot Text-to-Speech for Background Removal and Preservation with Controllable Masked Speech Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Leying Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wangyou Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhengyang Chen</a>, <a href="/search/cs?searchtype=author&query=Qian%2C+Y">Yanmin Qian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07345v1-abstract-short" style="display: inline;"> The acoustic background plays a crucial role in natural conversation. It provides context and helps listeners understand the environment, but a strong background makes it difficult for listeners to understand spoken words. The appropriate handling of these backgrounds is situation-dependent: Although it may be necessary to remove background to ensure speech clarity, preserving the background is so… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07345v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07345v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07345v1-abstract-full" style="display: none;"> The acoustic background plays a crucial role in natural conversation. It provides context and helps listeners understand the environment, but a strong background makes it difficult for listeners to understand spoken words. The appropriate handling of these backgrounds is situation-dependent: Although it may be necessary to remove background to ensure speech clarity, preserving the background is sometimes crucial to maintaining the contextual integrity of the speech. Despite recent advancements in zero-shot Text-to-Speech technologies, current systems often struggle with speech prompts containing backgrounds. To address these challenges, we propose a Controllable Masked Speech Prediction strategy coupled with a dual-speaker encoder, utilizing a task-related control signal to guide the prediction of dual background removal and preservation targets. Experimental results demonstrate that our approach enables precise control over the removal or preservation of background across various acoustic conditions and exhibits strong generalization capabilities in unseen scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07345v1-abstract-full').style.display = 'none'; document.getElementById('2502.07345v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ICASSP 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07288">arXiv:2502.07288</a> <span> [<a href="https://arxiv.org/pdf/2502.07288">pdf</a>, <a href="https://arxiv.org/format/2502.07288">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> KPIs 2024 Challenge: Advancing Glomerular Segmentation from Patch- to Slide-Level </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Deng%2C+R">Ruining Deng</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+T">Tianyuan Yao</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+Y">Yucheng Tang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+J">Junlin Guo</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+S">Siqi Lu</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+J">Juming Xiong</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+L">Lining Yu</a>, <a href="/search/cs?searchtype=author&query=Cap%2C+Q+H">Quan Huu Cap</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+P">Pengzhou Cai</a>, <a href="/search/cs?searchtype=author&query=Lan%2C+L">Libin Lan</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Ze Zhao</a>, <a href="/search/cs?searchtype=author&query=Galdran%2C+A">Adrian Galdran</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+A">Amit Kumar</a>, <a href="/search/cs?searchtype=author&query=Deotale%2C+G">Gunjan Deotale</a>, <a href="/search/cs?searchtype=author&query=Das%2C+D+K">Dev Kumar Das</a>, <a href="/search/cs?searchtype=author&query=Paik%2C+I">Inyoung Paik</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J">Joonho Lee</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+G">Geongyu Lee</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yujia Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Wangkai Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhaoyang Li</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+X">Xuege Hou</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zeyuan Wu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shengjin Wang</a>, <a href="/search/cs?searchtype=author&query=Fischer%2C+M">Maximilian Fischer</a> , et al. (22 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07288v1-abstract-short" style="display: inline;"> Chronic kidney disease (CKD) is a major global health issue, affecting over 10% of the population and causing significant mortality. While kidney biopsy remains the gold standard for CKD diagnosis and treatment, the lack of comprehensive benchmarks for kidney pathology segmentation hinders progress in the field. To address this, we organized the Kidney Pathology Image Segmentation (KPIs) Challenge… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07288v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07288v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07288v1-abstract-full" style="display: none;"> Chronic kidney disease (CKD) is a major global health issue, affecting over 10% of the population and causing significant mortality. While kidney biopsy remains the gold standard for CKD diagnosis and treatment, the lack of comprehensive benchmarks for kidney pathology segmentation hinders progress in the field. To address this, we organized the Kidney Pathology Image Segmentation (KPIs) Challenge, introducing a dataset that incorporates preclinical rodent models of CKD with over 10,000 annotated glomeruli from 60+ Periodic Acid Schiff (PAS)-stained whole slide images. The challenge includes two tasks, patch-level segmentation and whole slide image segmentation and detection, evaluated using the Dice Similarity Coefficient (DSC) and F1-score. By encouraging innovative segmentation methods that adapt to diverse CKD models and tissue conditions, the KPIs Challenge aims to advance kidney pathology analysis, establish new benchmarks, and enable precise, large-scale quantification for disease research and diagnosis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07288v1-abstract-full').style.display = 'none'; document.getElementById('2502.07288v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07164">arXiv:2502.07164</a> <span> [<a href="https://arxiv.org/pdf/2502.07164">pdf</a>, <a href="https://arxiv.org/format/2502.07164">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Does Training on Synthetic Data Make Models Less Robust? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lingze Zhang</a>, <a href="/search/cs?searchtype=author&query=Pavlick%2C+E">Ellie Pavlick</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07164v1-abstract-short" style="display: inline;"> An increasingly common practice is to train large language models (LLMs) using synthetic data. Often this synthetic data is produced by the same or similar LLMs as those it is being used to train. This raises the question of whether the synthetic data might in fact exacerbate certain "blindspots" by reinforcing heuristics that the LLM already encodes. In this paper, we conduct simulated experiment… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07164v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07164v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07164v1-abstract-full" style="display: none;"> An increasingly common practice is to train large language models (LLMs) using synthetic data. Often this synthetic data is produced by the same or similar LLMs as those it is being used to train. This raises the question of whether the synthetic data might in fact exacerbate certain "blindspots" by reinforcing heuristics that the LLM already encodes. In this paper, we conduct simulated experiments on the natural language inference (NLI) task with Llama-2-7B-hf models. We use MultiNLI as the general task and HANS, a targeted evaluation set designed to measure the presence of specific heuristic strategies for NLI, as our "blindspot" task. Our goal is to determine whether performance disparities between the general and blind spot tasks emerge. Our results indicate that synthetic data does not reinforce blindspots in the way we expected. Specifically, we see that, while fine-tuning with synthetic data doesn't necessarily reduce the use of the heuristic, it also does not make it worse as we hypothesized. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07164v1-abstract-full').style.display = 'none'; document.getElementById('2502.07164v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06887">arXiv:2502.06887</a> <span> [<a href="https://arxiv.org/pdf/2502.06887">pdf</a>, <a href="https://arxiv.org/ps/2502.06887">ps</a>, <a href="https://arxiv.org/format/2502.06887">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Gradient Based Method for the Fusion of Lattice Quantizers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Liyuan Zhang</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+H">Hanzhong Cao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiaheng Li</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+M">Minyang Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06887v1-abstract-short" style="display: inline;"> In practical applications, lattice quantizers leverage discrete lattice points to approximate arbitrary points in the lattice. An effective lattice quantizer significantly enhances both the accuracy and efficiency of these approximations. In the context of high-dimensional lattice quantization, previous work proposed utilizing low-dimensional optimal lattice quantizers and addressed the challenge… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06887v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06887v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06887v1-abstract-full" style="display: none;"> In practical applications, lattice quantizers leverage discrete lattice points to approximate arbitrary points in the lattice. An effective lattice quantizer significantly enhances both the accuracy and efficiency of these approximations. In the context of high-dimensional lattice quantization, previous work proposed utilizing low-dimensional optimal lattice quantizers and addressed the challenge of determining the optimal length ratio in orthogonal splicing. Notably, it was demonstrated that fixed length ratios and orthogonality yield suboptimal results when combining low-dimensional lattices. Building on this foundation, another approach employed gradient descent to identify optimal lattices, which inspired us to explore the use of neural networks to discover matrices that outperform those obtained from orthogonal splicing methods. We propose two novel approaches to tackle this problem: the Household Algorithm and the Matrix Exp Algorithm. Our results indicate that both the Household Algorithm and the Matrix Exp Algorithm achieve improvements in lattice quantizers across dimensions 13, 15, 17 to 19, 21, and 22. Moreover, the Matrix Exp Algorithm demonstrates superior efficacy in high-dimensional settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06887v1-abstract-full').style.display = 'none'; document.getElementById('2502.06887v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06852">arXiv:2502.06852</a> <span> [<a href="https://arxiv.org/pdf/2502.06852">pdf</a>, <a href="https://arxiv.org/format/2502.06852">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> EAP-GP: Mitigating Saturation Effect in Gradient-based Automated Circuit Identification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lin Zhang</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+W">Wenshuo Dong</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhuoran Zhang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+S">Shu Yang</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+L">Lijie Hu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+N">Ninghao Liu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+P">Pan Zhou</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Di Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06852v1-abstract-short" style="display: inline;"> Understanding the internal mechanisms of transformer-based language models remains challenging. Mechanistic interpretability based on circuit discovery aims to reverse engineer neural networks by analyzing their internal processes at the level of computational subgraphs. In this paper, we revisit existing gradient-based circuit identification methods and find that their performance is either affec… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06852v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06852v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06852v1-abstract-full" style="display: none;"> Understanding the internal mechanisms of transformer-based language models remains challenging. Mechanistic interpretability based on circuit discovery aims to reverse engineer neural networks by analyzing their internal processes at the level of computational subgraphs. In this paper, we revisit existing gradient-based circuit identification methods and find that their performance is either affected by the zero-gradient problem or saturation effects, where edge attribution scores become insensitive to input changes, resulting in noisy and unreliable attribution evaluations for circuit components. To address the saturation effect, we propose Edge Attribution Patching with GradPath (EAP-GP), EAP-GP introduces an integration path, starting from the input and adaptively following the direction of the difference between the gradients of corrupted and clean inputs to avoid the saturated region. This approach enhances attribution reliability and improves the faithfulness of circuit identification. We evaluate EAP-GP on 6 datasets using GPT-2 Small, GPT-2 Medium, and GPT-2 XL. Experimental results demonstrate that EAP-GP outperforms existing methods in circuit faithfulness, achieving improvements up to 17.7%. Comparisons with manually annotated ground-truth circuits demonstrate that EAP-GP achieves precision and recall comparable to or better than previous approaches, highlighting its effectiveness in identifying accurate circuits. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06852v1-abstract-full').style.display = 'none'; document.getElementById('2502.06852v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06371">arXiv:2502.06371</a> <span> [<a href="https://arxiv.org/pdf/2502.06371">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Simulation as Reality? The Effectiveness of LLM-Generated Data in Open-ended Question Assessment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Long Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+M">Meng Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W+L">Wei Lin Wang</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+Y">Yu Luo</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06371v1-abstract-short" style="display: inline;"> The advancement of Artificial Intelligence (AI) has created opportunities for e-learning, particularly in automated assessment systems that reduce educators' workload and provide timely feedback to students. However, developing effective AI-based assessment tools remains challenging due to the substantial resources required for collecting and annotating real student data. This study investigates t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06371v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06371v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06371v1-abstract-full" style="display: none;"> The advancement of Artificial Intelligence (AI) has created opportunities for e-learning, particularly in automated assessment systems that reduce educators' workload and provide timely feedback to students. However, developing effective AI-based assessment tools remains challenging due to the substantial resources required for collecting and annotating real student data. This study investigates the potential and gap of simulative data to address this limitation. Through a two-phase experimental study, we examined the effectiveness and gap of Large Language Model generated synthetic data in training educational assessment systems. Our findings reveal that while simulative data demonstrates promising results in training automated assessment models, outperforming state-of-the-art GPT-4o in most question types, its effectiveness has notable limitations. Specifically, models trained on synthetic data show excellent performance in simulated environment but need progress when applied to real-world scenarios. This performance gap highlights the limitations of only using synthetic data in controlled experimental settings for AI training. The absence of real-world noise and biases, which are also present in over-processed real-world data, contributes to this limitation. We recommend that future development of automated assessment agents and other AI tools should incorporate a mixture of synthetic and real-world data, or introduce more realistic noise and biases patterns, rather than relying solely on synthetic or over-processed data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06371v1-abstract-full').style.display = 'none'; document.getElementById('2502.06371v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06181">arXiv:2502.06181</a> <span> [<a href="https://arxiv.org/pdf/2502.06181">pdf</a>, <a href="https://arxiv.org/format/2502.06181">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> CANeRV: Content Adaptive Neural Representation for Video Compression </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tang%2C+L">Lv Tang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+J">Jun Zhu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xinfeng Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Li Zhang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+S">Siwei Ma</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Q">Qingming Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06181v2-abstract-short" style="display: inline;"> Recent advances in video compression introduce implicit neural representation (INR) based methods, which effectively capture global dependencies and characteristics of entire video sequences. Unlike traditional and deep learning based approaches, INR-based methods optimize network parameters from a global perspective, resulting in superior compression potential. However, most current INR methods u… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06181v2-abstract-full').style.display = 'inline'; document.getElementById('2502.06181v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06181v2-abstract-full" style="display: none;"> Recent advances in video compression introduce implicit neural representation (INR) based methods, which effectively capture global dependencies and characteristics of entire video sequences. Unlike traditional and deep learning based approaches, INR-based methods optimize network parameters from a global perspective, resulting in superior compression potential. However, most current INR methods utilize a fixed and uniform network architecture across all frames, limiting their adaptability to dynamic variations within and between video sequences. This often leads to suboptimal compression outcomes as these methods struggle to capture the distinct nuances and transitions in video content. To overcome these challenges, we propose Content Adaptive Neural Representation for Video Compression (CANeRV), an innovative INR-based video compression network that adaptively conducts structure optimisation based on the specific content of each video sequence. To better capture dynamic information across video sequences, we propose a dynamic sequence-level adjustment (DSA). Furthermore, to enhance the capture of dynamics between frames within a sequence, we implement a dynamic frame-level adjustment (DFA). {Finally, to effectively capture spatial structural information within video frames, thereby enhancing the detail restoration capabilities of CANeRV, we devise a structure level hierarchical structural adaptation (HSA).} Experimental results demonstrate that CANeRV can outperform both H.266/VVC and state-of-the-art INR-based video compression techniques across diverse video datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06181v2-abstract-full').style.display = 'none'; document.getElementById('2502.06181v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05213">arXiv:2502.05213</a> <span> [<a href="https://arxiv.org/pdf/2502.05213">pdf</a>, <a href="https://arxiv.org/format/2502.05213">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> DERMARK: A Dynamic, Efficient and Robust Multi-bit Watermark for Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+Q">Qihao Lin</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+C">Chen Tang</a>, <a href="/search/cs?searchtype=author&query=zhang%2C+L">Lan zhang</a>, <a href="/search/cs?searchtype=author&query=zhang%2C+J">Junyang zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiangyang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05213v1-abstract-short" style="display: inline;"> Well-trained large language models (LLMs) present significant risks, including potential malicious use and copyright infringement. Current studies aim to trace the distribution of LLM-generated texts by implicitly embedding watermarks. Among these, the single-bit watermarking method can only determine whether a given text was generated by an LLM. In contrast, the multi-bit watermarking method embe… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05213v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05213v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05213v1-abstract-full" style="display: none;"> Well-trained large language models (LLMs) present significant risks, including potential malicious use and copyright infringement. Current studies aim to trace the distribution of LLM-generated texts by implicitly embedding watermarks. Among these, the single-bit watermarking method can only determine whether a given text was generated by an LLM. In contrast, the multi-bit watermarking method embeds richer information into the generated text, which can identify which LLM generated and distributed a given text to which user. However, existing efforts embed the multi-bit watermark directly into the generated text without accounting for its watermarking capacity. This approach can result in embedding failures when the text's watermarking capacity is insufficient. In this paper, we derive the watermark embedding distribution based on the logits of LLMs and propose a formal inequality to segment the text optimally for watermark embedding. Building on this foundation, we propose DERMARK, a dynamic, efficient, and robust multi-bit watermarking method. DERMARK divides the text into segments of varying lengths for each bit embedding, adaptively matching the text's capacity. It achieves this with negligible overhead and robust performance against text editing by minimizing watermark extraction loss. Comprehensive experiments demonstrate that, compared to the SOTA method, our method reduces the number of tokens required for embedding each bit by 20\%, reduces watermark embedding time by 50\%, and is robust to text editing and watermark erasure attacks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05213v1-abstract-full').style.display = 'none'; document.getElementById('2502.05213v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 15 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05190">arXiv:2502.05190</a> <span> [<a href="https://arxiv.org/pdf/2502.05190">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Geophysics">physics.geo-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Physics-Trained Neural Network as Inverse Problem Solver for Potential Fields: An Example of Downward Continuation between Arbitrary Surfaces </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sun%2C+J">Jing Sun</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Lu Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Liang Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05190v1-abstract-short" style="display: inline;"> Downward continuation is a critical task in potential field processing, including gravity and magnetic fields, which aims to transfer data from one observation surface to another that is closer to the source of the field. Its effectiveness directly impacts the success of detecting and highlighting subsurface anomalous sources. We treat downward continuation as an inverse problem that relies on sol… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05190v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05190v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05190v1-abstract-full" style="display: none;"> Downward continuation is a critical task in potential field processing, including gravity and magnetic fields, which aims to transfer data from one observation surface to another that is closer to the source of the field. Its effectiveness directly impacts the success of detecting and highlighting subsurface anomalous sources. We treat downward continuation as an inverse problem that relies on solving a forward problem defined by the formula for upward continuation, and we propose a new physics-trained deep neural network (DNN)-based solution for this task. We hard-code the upward continuation process into the DNN's learning framework, where the DNN itself learns to act as the inverse problem solver and can perform downward continuation without ever being shown any ground truth data. We test the proposed method on both synthetic magnetic data and real-world magnetic data from West Antarctica. The preliminary results demonstrate its effectiveness through comparison with selected benchmarks, opening future avenues for the combined use of DNNs and established geophysical theories to address broader potential field inverse problems, such as density and geometry modelling. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05190v1-abstract-full').style.display = 'none'; document.getElementById('2502.05190v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04471">arXiv:2502.04471</a> <span> [<a href="https://arxiv.org/pdf/2502.04471">pdf</a>, <a href="https://arxiv.org/format/2502.04471">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Identifying Flaky Tests in Quantum Code: A Machine Learning Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kaur%2C+K">Khushdeep Kaur</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+D">Dongchan Kim</a>, <a href="/search/cs?searchtype=author&query=Jamshidi%2C+A">Ainaz Jamshidi</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lei Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04471v1-abstract-short" style="display: inline;"> Testing and debugging quantum software pose significant challenges due to the inherent complexities of quantum mechanics, such as superposition and entanglement. One challenge is indeterminacy, a fundamental characteristic of quantum systems, which increases the likelihood of flaky tests in quantum programs. To the best of our knowledge, there is a lack of comprehensive studies on quantum flakines… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04471v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04471v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04471v1-abstract-full" style="display: none;"> Testing and debugging quantum software pose significant challenges due to the inherent complexities of quantum mechanics, such as superposition and entanglement. One challenge is indeterminacy, a fundamental characteristic of quantum systems, which increases the likelihood of flaky tests in quantum programs. To the best of our knowledge, there is a lack of comprehensive studies on quantum flakiness in the existing literature. In this paper, we present a novel machine learning platform that leverages multiple machine learning models to automatically detect flaky tests in quantum programs. Our evaluation shows that the extreme gradient boosting and decision tree-based models outperform other models (i.e., random forest, k-nearest neighbors, and support vector machine), achieving the highest F1 score and Matthews Correlation Coefficient in a balanced dataset and an imbalanced dataset, respectively. Furthermore, we expand the currently limited dataset for researchers interested in quantum flaky tests. In the future, we plan to explore the development of unsupervised learning techniques to detect and classify quantum flaky tests more effectively. These advancements aim to improve the reliability and robustness of quantum software testing. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04471v1-abstract-full').style.display = 'none'; document.getElementById('2502.04471v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 1 figure, accepted by Q-SANER 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04428">arXiv:2502.04428</a> <span> [<a href="https://arxiv.org/pdf/2502.04428">pdf</a>, <a href="https://arxiv.org/format/2502.04428">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Confident or Seek Stronger: Exploring Uncertainty-Based On-device LLM Routing From Benchmarking to Generalization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chuang%2C+Y">Yu-Neng Chuang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+L">Leisheng Yu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+G">Guanchu Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lizhe Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zirui Liu</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+X">Xuanting Cai</a>, <a href="/search/cs?searchtype=author&query=Sui%2C+Y">Yang Sui</a>, <a href="/search/cs?searchtype=author&query=Braverman%2C+V">Vladimir Braverman</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+X">Xia Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04428v1-abstract-short" style="display: inline;"> Large language models (LLMs) are increasingly deployed and democratized on edge devices. To improve the efficiency of on-device deployment, small language models (SLMs) are often adopted due to their efficient decoding latency and reduced energy consumption. However, these SLMs often generate inaccurate responses when handling complex queries. One promising solution is uncertainty-based SLM routin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04428v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04428v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04428v1-abstract-full" style="display: none;"> Large language models (LLMs) are increasingly deployed and democratized on edge devices. To improve the efficiency of on-device deployment, small language models (SLMs) are often adopted due to their efficient decoding latency and reduced energy consumption. However, these SLMs often generate inaccurate responses when handling complex queries. One promising solution is uncertainty-based SLM routing, offloading high-stakes queries to stronger LLMs when resulting in low-confidence responses on SLM. This follows the principle of "If you lack confidence, seek stronger support" to enhance reliability. Relying on more powerful LLMs is yet effective but increases invocation costs. Therefore, striking a routing balance between efficiency and efficacy remains a critical challenge. Additionally, efficiently generalizing the routing strategy to new datasets remains under-explored. In this paper, we conduct a comprehensive investigation into benchmarking and generalization of uncertainty-driven routing strategies from SLMs to LLMs over 1500+ settings. Our findings highlight: First, uncertainty-correctness alignment in different uncertainty quantification (UQ) methods significantly impacts routing performance. Second, uncertainty distributions depend more on both the specific SLM and the chosen UQ method, rather than downstream data. Building on the insight, we propose a calibration data construction instruction pipeline and open-source a constructed hold-out set to enhance routing generalization on new downstream scenarios. The experimental results indicate calibration data effectively bootstraps routing performance without any new data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04428v1-abstract-full').style.display = 'none'; document.getElementById('2502.04428v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04295">arXiv:2502.04295</a> <span> [<a href="https://arxiv.org/pdf/2502.04295">pdf</a>, <a href="https://arxiv.org/format/2502.04295">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Beyond Prompt Content: Enhancing LLM Performance via Content-Format Integrated Prompt Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yuanye Liu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jiahang Xu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L+L">Li Lyna Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Q">Qi Chen</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+X">Xuan Feng</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yang Chen</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Z">Zhongxin Guo</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yuqing Yang</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+P">Peng Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04295v2-abstract-short" style="display: inline;"> Large Language Models (LLMs) have shown significant capability across various tasks, with their real-world effectiveness often driven by prompt design. While recent research has focused on optimizing prompt content, the role of prompt formatting, a critical but often overlooked dimension, has received limited systematic investigation. In this paper, we introduce Content-Format Integrated Prompt Op… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04295v2-abstract-full').style.display = 'inline'; document.getElementById('2502.04295v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04295v2-abstract-full" style="display: none;"> Large Language Models (LLMs) have shown significant capability across various tasks, with their real-world effectiveness often driven by prompt design. While recent research has focused on optimizing prompt content, the role of prompt formatting, a critical but often overlooked dimension, has received limited systematic investigation. In this paper, we introduce Content-Format Integrated Prompt Optimization (CFPO), an innovative methodology that jointly optimizes both prompt content and formatting through an iterative refinement process. CFPO leverages natural language mutations to explore content variations and employs a dynamic format exploration strategy that systematically evaluates diverse format options. Our extensive evaluations across multiple tasks and open-source LLMs demonstrate that CFPO demonstrates measurable performance improvements compared to content-only optimization methods. This highlights the importance of integrated content-format optimization and offers a practical, model-agnostic approach to enhancing LLM performance. Code is available at https://github.com/HenryLau7/CFPO. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04295v2-abstract-full').style.display = 'none'; document.getElementById('2502.04295v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04093">arXiv:2502.04093</a> <span> [<a href="https://arxiv.org/pdf/2502.04093">pdf</a>, <a href="https://arxiv.org/format/2502.04093">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> PSZ: Enhancing the SZ Scientific Lossy Compressor With Progressive Data Retrieval </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+Z">Zhuoxun Yang</a>, <a href="/search/cs?searchtype=author&query=Di%2C+S">Sheng Di</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Longtao Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Ruoyu Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Ximiao Li</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+J">Jiajun Huang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jinyang Liu</a>, <a href="/search/cs?searchtype=author&query=Cappello%2C+F">Franck Cappello</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+K">Kai Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04093v2-abstract-short" style="display: inline;"> Compression is a crucial solution for data reduction in modern scientific applications due to the exponential growth of data from simulations, experiments, and observations. Compression with progressive retrieval capability allows users to access coarse approximations of data quickly and then incrementally refine these approximations to higher fidelity. Existing progressive compression solutions s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04093v2-abstract-full').style.display = 'inline'; document.getElementById('2502.04093v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04093v2-abstract-full" style="display: none;"> Compression is a crucial solution for data reduction in modern scientific applications due to the exponential growth of data from simulations, experiments, and observations. Compression with progressive retrieval capability allows users to access coarse approximations of data quickly and then incrementally refine these approximations to higher fidelity. Existing progressive compression solutions suffer from low reduction ratios or high operation costs, effectively undermining the approach's benefits. In this paper, we propose the first-ever interpolation-based progressive lossy compression solution that has both high reduction ratios and low operation costs. The interpolation-based algorithm has been verified as one of the best for scientific data reduction, but previously no effort exists to make it support progressive retrieval. Our contributions are three-fold: (1) We thoroughly analyze the error characteristics of the interpolation algorithm and propose our solution IPComp with multi-level bitplane and predictive coding. (2) We derive optimized strategies toward minimum data retrieval under different fidelity levels indicated by users through error bounds and bitrates. (3) We evaluate the proposed solution using six real-world datasets from four diverse domains. Experimental results demonstrate our solution archives up to $487\%$ higher compression ratios and $698\%$ faster speed than other state-of-the-art progressive compressors, and reduces the data volume for retrieval by up to $83\%$ compared to baselines under the same error bound, and reduces the error by up to $99\%$ under the same bitrate. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04093v2-abstract-full').style.display = 'none'; document.getElementById('2502.04093v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03496">arXiv:2502.03496</a> <span> [<a href="https://arxiv.org/pdf/2502.03496">pdf</a>, <a href="https://arxiv.org/format/2502.03496">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Graphics">cs.GR</span> </div> </div> <p class="title is-5 mathjax"> FreqPrior: Improving Video Diffusion Models with Frequency Filtering Gaussian Noise </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yuan%2C+Y">Yunlong Yuan</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Y">Yuanfan Guo</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chunwei Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wei Zhang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+H">Hang Xu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Li Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03496v1-abstract-short" style="display: inline;"> Text-driven video generation has advanced significantly due to developments in diffusion models. Beyond the training and sampling phases, recent studies have investigated noise priors of diffusion models, as improved noise priors yield better generation results. One recent approach employs the Fourier transform to manipulate noise, marking the initial exploration of frequency operations in this co… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03496v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03496v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03496v1-abstract-full" style="display: none;"> Text-driven video generation has advanced significantly due to developments in diffusion models. Beyond the training and sampling phases, recent studies have investigated noise priors of diffusion models, as improved noise priors yield better generation results. One recent approach employs the Fourier transform to manipulate noise, marking the initial exploration of frequency operations in this context. However, it often generates videos that lack motion dynamics and imaging details. In this work, we provide a comprehensive theoretical analysis of the variance decay issue present in existing methods, contributing to the loss of details and motion dynamics. Recognizing the critical impact of noise distribution on generation quality, we introduce FreqPrior, a novel noise initialization strategy that refines noise in the frequency domain. Our method features a novel filtering technique designed to address different frequency signals while maintaining the noise prior distribution that closely approximates a standard Gaussian distribution. Additionally, we propose a partial sampling process by perturbing the latent at an intermediate timestep during finding the noise prior, significantly reducing inference time without compromising quality. Extensive experiments on VBench demonstrate that our method achieves the highest scores in both quality and semantic assessments, resulting in the best overall total score. These results highlight the superiority of our proposed noise prior. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03496v1-abstract-full').style.display = 'none'; document.getElementById('2502.03496v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">ICLR 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03493">arXiv:2502.03493</a> <span> [<a href="https://arxiv.org/pdf/2502.03493">pdf</a>, <a href="https://arxiv.org/format/2502.03493">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MetaFE-DE: Learning Meta Feature Embedding for Depth Estimation from Monocular Endoscopic Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lu%2C+D">Dawei Lu</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+D">Deqiang Xiao</a>, <a href="/search/cs?searchtype=author&query=Ai%2C+D">Danni Ai</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+J">Jingfan Fan</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+T">Tianyu Fu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Y">Yucong Lin</a>, <a href="/search/cs?searchtype=author&query=Song%2C+H">Hong Song</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+X">Xujiong Ye</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lei Zhang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jian Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03493v1-abstract-short" style="display: inline;"> Depth estimation from monocular endoscopic images presents significant challenges due to the complexity of endoscopic surgery, such as irregular shapes of human soft tissues, as well as variations in lighting conditions. Existing methods primarily estimate the depth information from RGB images directly, and often surffer the limited interpretability and accuracy. Given that RGB and depth images ar… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03493v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03493v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03493v1-abstract-full" style="display: none;"> Depth estimation from monocular endoscopic images presents significant challenges due to the complexity of endoscopic surgery, such as irregular shapes of human soft tissues, as well as variations in lighting conditions. Existing methods primarily estimate the depth information from RGB images directly, and often surffer the limited interpretability and accuracy. Given that RGB and depth images are two views of the same endoscopic surgery scene, in this paper, we introduce a novel concept referred as ``meta feature embedding (MetaFE)", in which the physical entities (e.g., tissues and surgical instruments) of endoscopic surgery are represented using the shared features that can be alternatively decoded into RGB or depth image. With this concept, we propose a two-stage self-supervised learning paradigm for the monocular endoscopic depth estimation. In the first stage, we propose a temporal representation learner using diffusion models, which are aligned with the spatial information through the cross normalization to construct the MetaFE. In the second stage, self-supervised monocular depth estimation with the brightness calibration is applied to decode the meta features into the depth image. Extensive evaluation on diverse endoscopic datasets demonstrates that our approach outperforms the state-of-the-art method in depth estimation, achieving superior accuracy and generalization. The source code will be publicly available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03493v1-abstract-full').style.display = 'none'; document.getElementById('2502.03493v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02444">arXiv:2502.02444</a> <span> [<a href="https://arxiv.org/pdf/2502.02444">pdf</a>, <a href="https://arxiv.org/format/2502.02444">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Generative Psycho-Lexical Approach for Constructing Value Systems in Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ye%2C+H">Haoran Ye</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+T">Tianze Zhang</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+Y">Yuhang Xie</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Liyuan Zhang</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+Y">Yuanyi Ren</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xin Zhang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+G">Guojie Song</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02444v2-abstract-short" style="display: inline;"> Values are core drivers of individual and collective perception, cognition, and behavior. Value systems, such as Schwartz's Theory of Basic Human Values, delineate the hierarchy and interplay among these values, enabling cross-disciplinary investigations into decision-making and societal dynamics. Recently, the rise of Large Language Models (LLMs) has raised concerns regarding their elusive intrin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02444v2-abstract-full').style.display = 'inline'; document.getElementById('2502.02444v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02444v2-abstract-full" style="display: none;"> Values are core drivers of individual and collective perception, cognition, and behavior. Value systems, such as Schwartz's Theory of Basic Human Values, delineate the hierarchy and interplay among these values, enabling cross-disciplinary investigations into decision-making and societal dynamics. Recently, the rise of Large Language Models (LLMs) has raised concerns regarding their elusive intrinsic values. Despite growing efforts in evaluating, understanding, and aligning LLM values, a psychologically grounded LLM value system remains underexplored. This study addresses the gap by introducing the Generative Psycho-Lexical Approach (GPLA), a scalable, adaptable, and theoretically informed method for constructing value systems. Leveraging GPLA, we propose a psychologically grounded five-factor value system tailored for LLMs. For systematic validation, we present three benchmarking tasks that integrate psychological principles with cutting-edge AI priorities. Our results reveal that the proposed value system meets standard psychological criteria, better captures LLM values, improves LLM safety prediction, and enhances LLM alignment, when compared to the canonical Schwartz's values. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02444v2-abstract-full').style.display = 'none'; document.getElementById('2502.02444v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.01959">arXiv:2502.01959</a> <span> [<a href="https://arxiv.org/pdf/2502.01959">pdf</a>, <a href="https://arxiv.org/format/2502.01959">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MATCNN: Infrared and Visible Image Fusion Method Based on Multi-scale CNN with Attention Transformer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jingjing Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Li Zhang</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+X">Xiaoyang Zeng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+W">Wanquan Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jianhua Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.01959v1-abstract-short" style="display: inline;"> While attention-based approaches have shown considerable progress in enhancing image fusion and addressing the challenges posed by long-range feature dependencies, their efficacy in capturing local features is compromised by the lack of diverse receptive field extraction techniques. To overcome the shortcomings of existing fusion methods in extracting multi-scale local features and preserving glob… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01959v1-abstract-full').style.display = 'inline'; document.getElementById('2502.01959v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.01959v1-abstract-full" style="display: none;"> While attention-based approaches have shown considerable progress in enhancing image fusion and addressing the challenges posed by long-range feature dependencies, their efficacy in capturing local features is compromised by the lack of diverse receptive field extraction techniques. To overcome the shortcomings of existing fusion methods in extracting multi-scale local features and preserving global features, this paper proposes a novel cross-modal image fusion approach based on a multi-scale convolutional neural network with attention Transformer (MATCNN). MATCNN utilizes the multi-scale fusion module (MSFM) to extract local features at different scales and employs the global feature extraction module (GFEM) to extract global features. Combining the two reduces the loss of detail features and improves the ability of global feature representation. Simultaneously, an information mask is used to label pertinent details within the images, aiming to enhance the proportion of preserving significant information in infrared images and background textures in visible images in fused images. Subsequently, a novel optimization algorithm is developed, leveraging the mask to guide feature extraction through the integration of content, structural similarity index measurement, and global feature loss. Quantitative and qualitative evaluations are conducted across various datasets, revealing that MATCNN effectively highlights infrared salient targets, preserves additional details in visible images, and achieves better fusion results for cross-modal images. The code of MATCNN will be available at https://github.com/zhang3849/MATCNN.git. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.01959v1-abstract-full').style.display = 'none'; document.getElementById('2502.01959v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00855">arXiv:2502.00855</a> <span> [<a href="https://arxiv.org/pdf/2502.00855">pdf</a>, <a href="https://arxiv.org/format/2502.00855">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Psychometric-Based Evaluation for Theorem Proving with Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jianyu Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yongwang Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Long Zhang</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+J">Jilin Hu</a>, <a href="/search/cs?searchtype=author&query=Luan%2C+X">Xiaokun Luan</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zhiwei Xu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+F">Feng Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00855v1-abstract-short" style="display: inline;"> Large language models (LLMs) for formal theorem proving have become a prominent research focus. At present, the proving ability of these LLMs is mainly evaluated through proof pass rates on datasets such as miniF2F. However, this evaluation method overlooks the varying importance of theorems. As a result, it fails to highlight the real performance disparities between LLMs and leads to high evaluat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00855v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00855v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00855v1-abstract-full" style="display: none;"> Large language models (LLMs) for formal theorem proving have become a prominent research focus. At present, the proving ability of these LLMs is mainly evaluated through proof pass rates on datasets such as miniF2F. However, this evaluation method overlooks the varying importance of theorems. As a result, it fails to highlight the real performance disparities between LLMs and leads to high evaluation costs. This study proposes a psychometric-based evaluation method for theorem proving with LLMs, comprising two main components: Dataset Annotation and Adaptive Evaluation. First, we propose a metric calculation method to annotate the dataset with difficulty and discrimination metrics. Specifically, we annotate each theorem in the miniF2F dataset and grade them into varying difficulty levels according to the performance of LLMs, resulting in an enhanced dataset: miniF2F-Graded. Experimental results show that the difficulty grading in miniF2F-Graded better reflects the theorem difficulty perceived by LLMs. Secondly, we design an adaptive evaluation method to dynamically select the most suitable theorems for testing based on the annotated metrics and the real-time performance of LLMs. We apply this method to evaluate 10 LLMs. The results show that our method finely highlights the performance disparities between LLMs. It also reduces evaluation costs by using only 23% of the theorems in the dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00855v1-abstract-full').style.display = 'none'; document.getElementById('2502.00855v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00848">arXiv:2502.00848</a> <span> [<a href="https://arxiv.org/pdf/2502.00848">pdf</a>, <a href="https://arxiv.org/format/2502.00848">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> RealRAG: Retrieval-augmented Realistic Image Generation via Self-reflective Contrastive Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lyu%2C+Y">Yuanhuiyi Lyu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+X">Xu Zheng</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+L">Lutao Jiang</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yibo Yan</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+X">Xin Zou</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+H">Huiyu Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Linfeng Zhang</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+X">Xuming Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00848v1-abstract-short" style="display: inline;"> Recent text-to-image generative models, e.g., Stable Diffusion V3 and Flux, have achieved notable progress. However, these models are strongly restricted to their limited knowledge, a.k.a., their own fixed parameters, that are trained with closed datasets. This leads to significant hallucinations or distortions when facing fine-grained and unseen novel real-world objects, e.g., the appearance of t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00848v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00848v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00848v1-abstract-full" style="display: none;"> Recent text-to-image generative models, e.g., Stable Diffusion V3 and Flux, have achieved notable progress. However, these models are strongly restricted to their limited knowledge, a.k.a., their own fixed parameters, that are trained with closed datasets. This leads to significant hallucinations or distortions when facing fine-grained and unseen novel real-world objects, e.g., the appearance of the Tesla Cybertruck. To this end, we present the first real-object-based retrieval-augmented generation framework (RealRAG), which augments fine-grained and unseen novel object generation by learning and retrieving real-world images to overcome the knowledge gaps of generative models. Specifically, to integrate missing memory for unseen novel object generation, we train a reflective retriever by self-reflective contrastive learning, which injects the generator's knowledge into the sef-reflective negatives, ensuring that the retrieved augmented images compensate for the model's missing knowledge. Furthermore, the real-object-based framework integrates fine-grained visual knowledge for the generative models, tackling the distortion problem and improving the realism for fine-grained object generation. Our Real-RAG is superior in its modular application to all types of state-of-the-art text-to-image generative models and also delivers remarkable performance boosts with all of them, such as a gain of 16.18% FID score with the auto-regressive model on the Stanford Car benchmark. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00848v1-abstract-full').style.display = 'none'; document.getElementById('2502.00848v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00753">arXiv:2502.00753</a> <span> [<a href="https://arxiv.org/pdf/2502.00753">pdf</a>, <a href="https://arxiv.org/format/2502.00753">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Optimization and Control">math.OC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Mirror Descent Under Generalized Smoothness </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dingzhi Yu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+W">Wei Jiang</a>, <a href="/search/cs?searchtype=author&query=Wan%2C+Y">Yuanyu Wan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lijun Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00753v1-abstract-short" style="display: inline;"> Smoothness is crucial for attaining fast rates in first-order optimization. However, many optimization problems in modern machine learning involve non-smooth objectives. Recent studies relax the smoothness assumption by allowing the Lipschitz constant of the gradient to grow with respect to the gradient norm, which accommodates a broad range of objectives in practice. Despite this progress, existi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00753v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00753v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00753v1-abstract-full" style="display: none;"> Smoothness is crucial for attaining fast rates in first-order optimization. However, many optimization problems in modern machine learning involve non-smooth objectives. Recent studies relax the smoothness assumption by allowing the Lipschitz constant of the gradient to grow with respect to the gradient norm, which accommodates a broad range of objectives in practice. Despite this progress, existing generalizations of smoothness are restricted to Euclidean geometry with $\ell_2$-norm and only have theoretical guarantees for optimization in the Euclidean space. In this paper, we address this limitation by introducing a new $\ell*$-smoothness concept that measures the norm of Hessian in terms of a general norm and its dual, and establish convergence for mirror-descent-type algorithms, matching the rates under the classic smoothness. Notably, we propose a generalized self-bounding property that facilitates bounding the gradients via controlling suboptimality gaps, serving as a principal component for convergence analysis. Beyond deterministic optimization, we establish an anytime convergence for stochastic mirror descent based on a new bounded noise condition that encompasses the widely adopted bounded or affine noise assumptions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00753v1-abstract-full').style.display = 'none'; document.getElementById('2502.00753v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">59 pages, 2 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00725">arXiv:2502.00725</a> <span> [<a href="https://arxiv.org/pdf/2502.00725">pdf</a>, <a href="https://arxiv.org/format/2502.00725">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Understanding and Mitigating the High Computational Cost in Path Data Diffusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Shi%2C+D">Dingyuan Shi</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lulu Zhang</a>, <a href="/search/cs?searchtype=author&query=Tong%2C+Y">Yongxin Tong</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+K">Ke Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00725v1-abstract-short" style="display: inline;"> Advancements in mobility services, navigation systems, and smart transportation technologies have made it possible to collect large amounts of path data. Modeling the distribution of this path data, known as the Path Generation (PG) problem, is crucial for understanding urban mobility patterns and developing intelligent transportation systems. Recent studies have explored using diffusion models to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00725v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00725v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00725v1-abstract-full" style="display: none;"> Advancements in mobility services, navigation systems, and smart transportation technologies have made it possible to collect large amounts of path data. Modeling the distribution of this path data, known as the Path Generation (PG) problem, is crucial for understanding urban mobility patterns and developing intelligent transportation systems. Recent studies have explored using diffusion models to address the PG problem due to their ability to capture multimodal distributions and support conditional generation. A recent work devises a diffusion process explicitly in graph space and achieves state-of-the-art performance. However, this method suffers a high computation cost in terms of both time and memory, which prohibits its application. In this paper, we analyze this method both theoretically and experimentally and find that the main culprit of its high computation cost is its explicit design of the diffusion process in graph space. To improve efficiency, we devise a Latent-space Path Diffusion (LPD) model, which operates in latent space instead of graph space. Our LPD significantly reduces both time and memory costs by up to 82.8% and 83.1%, respectively. Despite these reductions, our approach does not suffer from performance degradation. It outperforms the state-of-the-art method in most scenarios by 24.5%~34.0%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00725v1-abstract-full').style.display = 'none'; document.getElementById('2502.00725v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">16 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00712">arXiv:2502.00712</a> <span> [<a href="https://arxiv.org/pdf/2502.00712">pdf</a>, <a href="https://arxiv.org/format/2502.00712">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Registration-Enhanced Segmentation Method for Prostate Cancer in Ultrasound Images </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sang%2C+S">Shengtian Sang</a>, <a href="/search/cs?searchtype=author&query=Jahanandish%2C+H">Hassan Jahanandish</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C+X">Cynthia Xinran Li</a>, <a href="/search/cs?searchtype=author&query=Bhattachary%2C+I">Indrani Bhattachary</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J+H">Jeong Hoon Lee</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lichun Zhang</a>, <a href="/search/cs?searchtype=author&query=Vesal%2C+S">Sulaiman Vesal</a>, <a href="/search/cs?searchtype=author&query=Ghanouni%2C+P">Pejman Ghanouni</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+R">Richard Fan</a>, <a href="/search/cs?searchtype=author&query=Sonn%2C+G+A">Geoffrey A. Sonn</a>, <a href="/search/cs?searchtype=author&query=Rusu%2C+M">Mirabela Rusu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00712v1-abstract-short" style="display: inline;"> Prostate cancer is a major cause of cancer-related deaths in men, where early detection greatly improves survival rates. Although MRI-TRUS fusion biopsy offers superior accuracy by combining MRI's detailed visualization with TRUS's real-time guidance, it is a complex and time-intensive procedure that relies heavily on manual annotations, leading to potential errors. To address these challenges, we… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00712v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00712v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00712v1-abstract-full" style="display: none;"> Prostate cancer is a major cause of cancer-related deaths in men, where early detection greatly improves survival rates. Although MRI-TRUS fusion biopsy offers superior accuracy by combining MRI's detailed visualization with TRUS's real-time guidance, it is a complex and time-intensive procedure that relies heavily on manual annotations, leading to potential errors. To address these challenges, we propose a fully automatic MRI-TRUS fusion-based segmentation method that identifies prostate tumors directly in TRUS images without requiring manual annotations. Unlike traditional multimodal fusion approaches that rely on naive data concatenation, our method integrates a registration-segmentation framework to align and leverage spatial information between MRI and TRUS modalities. This alignment enhances segmentation accuracy and reduces reliance on manual effort. Our approach was validated on a dataset of 1,747 patients from Stanford Hospital, achieving an average Dice coefficient of 0.212, outperforming TRUS-only (0.117) and naive MRI-TRUS fusion (0.132) methods, with significant improvements (p $<$ 0.01). This framework demonstrates the potential for reducing the complexity of prostate cancer diagnosis and provides a flexible architecture applicable to other multimodal medical imaging tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00712v1-abstract-full').style.display = 'none'; document.getElementById('2502.00712v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00631">arXiv:2502.00631</a> <span> [<a href="https://arxiv.org/pdf/2502.00631">pdf</a>, <a href="https://arxiv.org/format/2502.00631">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> MedConv: Convolutions Beat Transformers on Long-Tailed Bone Density Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qi%2C+X">Xuyin Qi</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zeyu Zhang</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+H">Huazhan Zheng</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+M">Mingxi Chen</a>, <a href="/search/cs?searchtype=author&query=Kutaiba%2C+N">Numan Kutaiba</a>, <a href="/search/cs?searchtype=author&query=Lim%2C+R">Ruth Lim</a>, <a href="/search/cs?searchtype=author&query=Chiang%2C+C">Cherie Chiang</a>, <a href="/search/cs?searchtype=author&query=Tham%2C+Z+E">Zi En Tham</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+X">Xuan Ren</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wenxin Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lei Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Hao Zhang</a>, <a href="/search/cs?searchtype=author&query=Lv%2C+W">Wenbing Lv</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+G">Guangzhen Yao</a>, <a href="/search/cs?searchtype=author&query=Han%2C+R">Renda Han</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+K">Kangsheng Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Mingyuan Li</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+H">Hongtao Mao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yu Li</a>, <a href="/search/cs?searchtype=author&query=Liao%2C+Z">Zhibin Liao</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yang Zhao</a>, <a href="/search/cs?searchtype=author&query=To%2C+M">Minh-Son To</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00631v1-abstract-short" style="display: inline;"> Bone density prediction via CT scans to estimate T-scores is crucial, providing a more precise assessment of bone health compared to traditional methods like X-ray bone density tests, which lack spatial resolution and the ability to detect localized changes. However, CT-based prediction faces two major challenges: the high computational complexity of transformer-based architectures, which limits t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00631v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00631v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00631v1-abstract-full" style="display: none;"> Bone density prediction via CT scans to estimate T-scores is crucial, providing a more precise assessment of bone health compared to traditional methods like X-ray bone density tests, which lack spatial resolution and the ability to detect localized changes. However, CT-based prediction faces two major challenges: the high computational complexity of transformer-based architectures, which limits their deployment in portable and clinical settings, and the imbalanced, long-tailed distribution of real-world hospital data that skews predictions. To address these issues, we introduce MedConv, a convolutional model for bone density prediction that outperforms transformer models with lower computational demands. We also adapt Bal-CE loss and post-hoc logit adjustment to improve class balance. Extensive experiments on our AustinSpine dataset shows that our approach achieves up to 21% improvement in accuracy and 20% in ROC AUC over previous state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00631v1-abstract-full').style.display = 'none'; document.getElementById('2502.00631v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00602">arXiv:2502.00602</a> <span> [<a href="https://arxiv.org/pdf/2502.00602">pdf</a>, <a href="https://arxiv.org/format/2502.00602">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Mitigating Heterogeneous Token Overfitting in LLM Knowledge Editing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+T">Tianci Liu</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+Z">Zihan Dong</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Linjun Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haoyu Wang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+J">Jing Gao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00602v1-abstract-short" style="display: inline;"> Large language models (LLMs) have achieved remarkable performance on various natural language tasks. However, they are trained on static corpora and their knowledge can become outdated quickly in the fast-changing world. This motivates the development of knowledge editing (KE) to update specific knowledge in LLMs without changing unrelated others or compromising their pre-trained capabilities. Pre… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00602v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00602v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00602v1-abstract-full" style="display: none;"> Large language models (LLMs) have achieved remarkable performance on various natural language tasks. However, they are trained on static corpora and their knowledge can become outdated quickly in the fast-changing world. This motivates the development of knowledge editing (KE) to update specific knowledge in LLMs without changing unrelated others or compromising their pre-trained capabilities. Previous efforts sought to update a small amount of parameters of a LLM and proved effective for making selective updates. Nonetheless, the edited LLM often exhibits degraded ability to reason about the new knowledge. In this work, we identify a key issue: heterogeneous token overfitting (HTO), where the LLM overfits different tokens in the provided knowledge at varying rates. To tackle this, we propose OVERTONE, a token-level smoothing method that mitigates HTO by adaptively refining the target distribution. Theoretically, OVERTONE offers better parameter updates with negligible computation overhead. It also induces an implicit DPO but does not require preference data pairs. Extensive experiments across four editing methods, two LLMs, and diverse scenarios demonstrate the effectiveness and versatility of our method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00602v1-abstract-full').style.display = 'none'; document.getElementById('2502.00602v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00560">arXiv:2502.00560</a> <span> [<a href="https://arxiv.org/pdf/2502.00560">pdf</a>, <a href="https://arxiv.org/format/2502.00560">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> </div> </div> <p class="title is-5 mathjax"> A Scalable Solver for 2p0s Differential Games with One-Sided Payoff Information and Continuous Actions, States, and Time </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ghimire%2C+M">Mukesh Ghimire</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lei Zhang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zhe Xu</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+Y">Yi Ren</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00560v2-abstract-short" style="display: inline;"> Existing solvers for imperfect-information extensive-form games (IIEFGs) often struggle with scalability in terms of action and state space sizes and the number of time steps. However, many real-world games involve continuous action and state spaces and occur in continuous time, making them differential in nature. This paper addresses the scalability challenges for a representative class of two-pl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00560v2-abstract-full').style.display = 'inline'; document.getElementById('2502.00560v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00560v2-abstract-full" style="display: none;"> Existing solvers for imperfect-information extensive-form games (IIEFGs) often struggle with scalability in terms of action and state space sizes and the number of time steps. However, many real-world games involve continuous action and state spaces and occur in continuous time, making them differential in nature. This paper addresses the scalability challenges for a representative class of two-player zero-sum (2p0s) differential games where the informed player knows the game type (payoff) while the uninformed one only has a prior belief over the set of possible types. Such games encompass a wide range of attack-defense scenarios, where the defender adapts based on their belief about the attacker's target. We make the following contributions: (1) We show that under the Isaacs' condition, the complexity of computing the Nash equilibrium for these games is not related to the action space size; and (2) we propose a multigrid approach to effectively reduce the cost of these games when many time steps are involved. Code for this work is available at https://github.com/ghimiremukesh/cams/tree/conf_sub. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00560v2-abstract-full').style.display = 'none'; document.getElementById('2502.00560v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">32 pages, 12 figures. Fixed typo in table 1</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00366">arXiv:2502.00366</a> <span> [<a href="https://arxiv.org/pdf/2502.00366">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Prostate-Specific Foundation Models for Enhanced Detection of Clinically Significant Cancer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lee%2C+J+H">Jeong Hoon Lee</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C+X">Cynthia Xinran Li</a>, <a href="/search/cs?searchtype=author&query=Jahanandish%2C+H">Hassan Jahanandish</a>, <a href="/search/cs?searchtype=author&query=Bhattacharya%2C+I">Indrani Bhattacharya</a>, <a href="/search/cs?searchtype=author&query=Vesal%2C+S">Sulaiman Vesal</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lichun Zhang</a>, <a href="/search/cs?searchtype=author&query=Sang%2C+S">Shengtian Sang</a>, <a href="/search/cs?searchtype=author&query=Choi%2C+M+H">Moon Hyung Choi</a>, <a href="/search/cs?searchtype=author&query=Soerensen%2C+S+J+C">Simon John Christoph Soerensen</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+S+R">Steve Ran Zhou</a>, <a href="/search/cs?searchtype=author&query=Sommer%2C+E+R">Elijah Richard Sommer</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+R">Richard Fan</a>, <a href="/search/cs?searchtype=author&query=Ghanouni%2C+P">Pejman Ghanouni</a>, <a href="/search/cs?searchtype=author&query=Song%2C+Y">Yuze Song</a>, <a href="/search/cs?searchtype=author&query=Seibert%2C+T+M">Tyler M. Seibert</a>, <a href="/search/cs?searchtype=author&query=Sonn%2C+G+A">Geoffrey A. Sonn</a>, <a href="/search/cs?searchtype=author&query=Rusu%2C+M">Mirabela Rusu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00366v2-abstract-short" style="display: inline;"> Accurate prostate cancer diagnosis remains challenging. Even when using MRI, radiologists exhibit low specificity and significant inter-observer variability, leading to potential delays or inaccuracies in identifying clinically significant cancers. This leads to numerous unnecessary biopsies and risks of missing clinically significant cancers. Here we present prostate vision contrastive network (P… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00366v2-abstract-full').style.display = 'inline'; document.getElementById('2502.00366v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00366v2-abstract-full" style="display: none;"> Accurate prostate cancer diagnosis remains challenging. Even when using MRI, radiologists exhibit low specificity and significant inter-observer variability, leading to potential delays or inaccuracies in identifying clinically significant cancers. This leads to numerous unnecessary biopsies and risks of missing clinically significant cancers. Here we present prostate vision contrastive network (ProViCNet), prostate organ-specific vision foundation models for Magnetic Resonance Imaging (MRI) and Trans-Rectal Ultrasound imaging (TRUS) for comprehensive cancer detection. ProViCNet was trained and validated using 4,401 patients across six institutions, as a prostate cancer detection model on radiology images relying on patch-level contrastive learning guided by biopsy confirmed radiologist annotations. ProViCNet demonstrated consistent performance across multiple internal and external validation cohorts with area under the receiver operating curve values ranging from 0.875 to 0.966, significantly outperforming radiologists in the reader study (0.907 versus 0.805, p<0.001) for mpMRI, while achieving 0.670 to 0.740 for TRUS. We also integrated ProViCNet with standard PSA to develop a virtual screening test, and we showed that we can maintain the high sensitivity for detecting clinically significant cancers while more than doubling specificity from 15% to 38% (p<0.001), thereby substantially reducing unnecessary biopsies. These findings highlight that ProViCNet's potential for enhancing prostate cancer diagnosis accuracy and reduce unnecessary biopsies, thereby optimizing diagnostic pathways. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00366v2-abstract-full').style.display = 'none'; document.getElementById('2502.00366v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 1 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">44pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00352">arXiv:2502.00352</a> <span> [<a href="https://arxiv.org/pdf/2502.00352">pdf</a>, <a href="https://arxiv.org/format/2502.00352">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> A Differentiated Reward Method for Reinforcement Learning based Multi-Vehicle Cooperative Decision-Making Algorithms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Han%2C+Y">Ye Han</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lijun Zhang</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+D">Dejian Meng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00352v1-abstract-short" style="display: inline;"> Reinforcement learning (RL) shows great potential for optimizing multi-vehicle cooperative driving strategies through the state-action-reward feedback loop, but it still faces challenges such as low sample efficiency. This paper proposes a differentiated reward method based on steady-state transition systems, which incorporates state transition gradient information into the reward design by analyz… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00352v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00352v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00352v1-abstract-full" style="display: none;"> Reinforcement learning (RL) shows great potential for optimizing multi-vehicle cooperative driving strategies through the state-action-reward feedback loop, but it still faces challenges such as low sample efficiency. This paper proposes a differentiated reward method based on steady-state transition systems, which incorporates state transition gradient information into the reward design by analyzing traffic flow characteristics, aiming to optimize action selection and policy learning in multi-vehicle cooperative decision-making. The performance of the proposed method is validated in RL algorithms such as MAPPO, MADQN, and QMIX under varying autonomous vehicle penetration. The results show that the differentiated reward method significantly accelerates training convergence and outperforms centering reward and others in terms of traffic efficiency, safety, and action rationality. Additionally, the method demonstrates strong scalability and environmental adaptability, providing a novel approach for multi-agent cooperative decision-making in complex traffic scenarios. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00352v1-abstract-full').style.display = 'none'; document.getElementById('2502.00352v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 3 figures, submitted to IEEE IV 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00345">arXiv:2502.00345</a> <span> [<a href="https://arxiv.org/pdf/2502.00345">pdf</a>, <a href="https://arxiv.org/format/2502.00345">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> The Composite Task Challenge for Cooperative Multi-Agent Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yurui Li</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yuxuan Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Li Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shijian Li</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+G">Gang Pan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00345v1-abstract-short" style="display: inline;"> The significant role of division of labor (DOL) in promoting cooperation is widely recognized in real-world applications.Many cooperative multi-agent reinforcement learning (MARL) methods have incorporated the concept of DOL to improve cooperation among agents.However, the tasks used in existing testbeds typically correspond to tasks where DOL is often not a necessary feature for achieving optimal… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00345v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00345v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00345v1-abstract-full" style="display: none;"> The significant role of division of labor (DOL) in promoting cooperation is widely recognized in real-world applications.Many cooperative multi-agent reinforcement learning (MARL) methods have incorporated the concept of DOL to improve cooperation among agents.However, the tasks used in existing testbeds typically correspond to tasks where DOL is often not a necessary feature for achieving optimal policies.Additionally, the full utilize of DOL concept in MARL methods remains unrealized due to the absence of appropriate tasks.To enhance the generality and applicability of MARL methods in real-world scenarios, there is a necessary to develop tasks that demand multi-agent DOL and cooperation.In this paper, we propose a series of tasks designed to meet these requirements, drawing on real-world rules as the guidance for their design.We guarantee that DOL and cooperation are necessary condition for completing tasks and introduce three factors to expand the diversity of proposed tasks to cover more realistic situations.We evaluate 10 cooperative MARL methods on the proposed tasks.The results indicate that all baselines perform poorly on these tasks.To further validate the solvability of these tasks, we also propose simplified variants of proposed tasks.Experimental results show that baselines are able to handle these simplified variants, providing evidence of the solvability of the proposed tasks.The source files is available at https://github.com/Yurui-Li/CTC. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00345v1-abstract-full').style.display = 'none'; document.getElementById('2502.00345v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00198">arXiv:2502.00198</a> <span> [<a href="https://arxiv.org/pdf/2502.00198">pdf</a>, <a href="https://arxiv.org/format/2502.00198">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Science and Game Theory">cs.GT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Fairshare Data Pricing for Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Luyang Zhang</a>, <a href="/search/cs?searchtype=author&query=Jiao%2C+C">Cathy Jiao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Beibei Li</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+C">Chenyan Xiong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00198v1-abstract-short" style="display: inline;"> Training data is a pivotal resource for building large language models (LLMs), but unfair pricing in data markets poses a serious challenge for both data buyers (e.g., LLM builders) and sellers (e.g., human annotators), which discourages market participation, reducing data quantity and quality. In this paper, we propose a fairshare pricing framework that sets training data prices using data valuat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00198v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00198v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00198v1-abstract-full" style="display: none;"> Training data is a pivotal resource for building large language models (LLMs), but unfair pricing in data markets poses a serious challenge for both data buyers (e.g., LLM builders) and sellers (e.g., human annotators), which discourages market participation, reducing data quantity and quality. In this paper, we propose a fairshare pricing framework that sets training data prices using data valuation methods to quantify their contribution to LLMs. In our framework, buyers make purchasing decisions using data valuation and sellers set prices to maximize their profits based on the anticipated buyer purchases. We theoretically show that pricing derived from our framework is tightly linked to data valuation and buyers' budget, optimal for both buyers and sellers. Through market simulations using current LLMs and datasets (math problems, medical diagnosis, and physical reasoning), we show that our framework is fairshare for buyers by ensuring their purchased data is reflective of model training value, leading to higher LLM task performances per-dollar spent on data, and fairshare for sellers by ensuring they sell their data at optimal prices. Our framework lays the foundation for future research on equitable and sustainable data markets for large-scale AI. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00198v1-abstract-full').style.display = 'none'; document.getElementById('2502.00198v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00162">arXiv:2502.00162</a> <span> [<a href="https://arxiv.org/pdf/2502.00162">pdf</a>, <a href="https://arxiv.org/format/2502.00162">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Physics-informed Split Koopman Operators for Data-efficient Soft Robotic Simulation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ristich%2C+E">Eron Ristich</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lei Zhang</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+Y">Yi Ren</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+J">Jiefeng Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00162v1-abstract-short" style="display: inline;"> Koopman operator theory provides a powerful data-driven technique for modeling nonlinear dynamical systems in a linear framework, in comparison to computationally expensive and highly nonlinear physics-based simulations. However, Koopman operator-based models for soft robots are very high dimensional and require considerable amounts of data to properly resolve. Inspired by physics-informed techniq… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00162v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00162v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00162v1-abstract-full" style="display: none;"> Koopman operator theory provides a powerful data-driven technique for modeling nonlinear dynamical systems in a linear framework, in comparison to computationally expensive and highly nonlinear physics-based simulations. However, Koopman operator-based models for soft robots are very high dimensional and require considerable amounts of data to properly resolve. Inspired by physics-informed techniques from machine learning, we present a novel physics-informed Koopman operator identification method that improves simulation accuracy for small dataset sizes. Through Strang splitting, the method takes advantage of both continuous and discrete Koopman operator approximation to obtain information both from trajectory and phase space data. The method is validated on a tendon-driven soft robotic arm, showing orders of magnitude improvement over standard methods in terms of the shape error. We envision this method can significantly reduce the data requirement of Koopman operators for systems with partially known physical models, and thus reduce the cost of obtaining data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00162v1-abstract-full').style.display = 'none'; document.getElementById('2502.00162v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This work has been submitted to the IEEE for possible publication. Submitted to ICRA 2025 for review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00028">arXiv:2502.00028</a> <span> [<a href="https://arxiv.org/pdf/2502.00028">pdf</a>, <a href="https://arxiv.org/format/2502.00028">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Programming Languages">cs.PL</span> </div> </div> <p class="title is-5 mathjax"> VRank: Enhancing Verilog Code Generation from Large Language Models via Self-Consistency </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Zhuorui Zhao</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+R">Ruidi Qiu</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+I">Ing-Chao Lin</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+G+L">Grace Li Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bing Li</a>, <a href="/search/cs?searchtype=author&query=Schlichtmann%2C+U">Ulf Schlichtmann</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00028v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) have demonstrated promising capabilities in generating Verilog code from module specifications. To improve the quality of such generated Verilog codes, previous methods require either time-consuming manual inspection or generation of multiple Verilog codes, from which the one with the highest quality is selected with manually designed testbenches. To enhance the genera… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00028v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00028v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00028v1-abstract-full" style="display: none;"> Large Language Models (LLMs) have demonstrated promising capabilities in generating Verilog code from module specifications. To improve the quality of such generated Verilog codes, previous methods require either time-consuming manual inspection or generation of multiple Verilog codes, from which the one with the highest quality is selected with manually designed testbenches. To enhance the generation efficiency while maintaining the quality of the generated codes, we propose VRank, an automatic framework that generates Verilog codes with LLMs. In our framework, multiple code candidates are generated with LLMs by leveraging their probabilistic nature. Afterwards, we group Verilog code candidates into clusters based on identical outputs when tested against the same testbench, which is also generated by LLMs. Clusters are ranked based on the consistency they show on testbench. To determine the best candidate, Chain-of-Thought is further applied to select the best candidate from the top-ranked clusters. By systematically analyzing diverse outputs of generated codes, VRank reduces errors and enhances the overall quality of the generated Verilog code. Experimental results on the VerilogEval-Human benchmark demonstrate a significant 10.5% average increase in functional correctness (passl1) across multiple LLMs, demonstrating VRank's effectiveness in improving the accuracy of automated hardware description language generation for complex design tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00028v1-abstract-full').style.display = 'none'; document.getElementById('2502.00028v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">accepted by ISQED2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.19358">arXiv:2501.19358</a> <span> [<a href="https://arxiv.org/pdf/2501.19358">pdf</a>, <a href="https://arxiv.org/format/2501.19358">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> The Energy Loss Phenomenon in RLHF: A New Perspective on Mitigating Reward Hacking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Miao%2C+Y">Yuchun Miao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Sen Zhang</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+L">Liang Ding</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yuqi Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lefei Zhang</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+D">Dacheng Tao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.19358v2-abstract-short" style="display: inline;"> This work identifies the Energy Loss Phenomenon in Reinforcement Learning from Human Feedback (RLHF) and its connection to reward hacking. Specifically, energy loss in the final layer of a Large Language Model (LLM) gradually increases during the RL process, with an excessive increase in energy loss characterizing reward hacking. Beyond empirical analysis, we further provide a theoretical foundati… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.19358v2-abstract-full').style.display = 'inline'; document.getElementById('2501.19358v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.19358v2-abstract-full" style="display: none;"> This work identifies the Energy Loss Phenomenon in Reinforcement Learning from Human Feedback (RLHF) and its connection to reward hacking. Specifically, energy loss in the final layer of a Large Language Model (LLM) gradually increases during the RL process, with an excessive increase in energy loss characterizing reward hacking. Beyond empirical analysis, we further provide a theoretical foundation by proving that, under mild conditions, the increased energy loss reduces the upper bound of contextual relevance in LLMs, which is a critical aspect of reward hacking as the reduced contextual relevance typically indicates overfitting to reward model-favored patterns in RL. To address this issue, we propose an Energy loss-aware PPO algorithm (EPPO) which penalizes the increase in energy loss in the LLM's final layer during reward calculation to prevent excessive energy loss, thereby mitigating reward hacking. We theoretically show that EPPO can be conceptually interpreted as an entropy-regularized RL algorithm, which provides deeper insights into its effectiveness. Extensive experiments across various LLMs and tasks demonstrate the commonality of the energy loss phenomenon, as well as the effectiveness of EPPO in mitigating reward hacking and improving RLHF performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.19358v2-abstract-full').style.display = 'none'; document.getElementById('2501.19358v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">28 pages, 21 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18736">arXiv:2501.18736</a> <span> [<a href="https://arxiv.org/pdf/2501.18736">pdf</a>, <a href="https://arxiv.org/format/2501.18736">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Distillation-Driven Diffusion Model for Multi-Scale MRI Super-Resolution: Make 1.5T MRI Great Again </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhe Wang</a>, <a href="/search/cs?searchtype=author&query=Ru%2C+Y">Yuhua Ru</a>, <a href="/search/cs?searchtype=author&query=Bauer%2C+F">Fabian Bauer</a>, <a href="/search/cs?searchtype=author&query=Chetouani%2C+A">Aladine Chetouani</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+F">Fang Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Liping Zhang</a>, <a href="/search/cs?searchtype=author&query=Hans%2C+D">Didier Hans</a>, <a href="/search/cs?searchtype=author&query=Jennane%2C+R">Rachid Jennane</a>, <a href="/search/cs?searchtype=author&query=Jarraya%2C+M">Mohamed Jarraya</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y+H">Yung Hsin Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18736v1-abstract-short" style="display: inline;"> Magnetic Resonance Imaging (MRI) offers critical insights into microstructural details, however, the spatial resolution of standard 1.5T imaging systems is often limited. In contrast, 7T MRI provides significantly enhanced spatial resolution, enabling finer visualization of anatomical structures. Though this, the high cost and limited availability of 7T MRI hinder its widespread use in clinical se… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18736v1-abstract-full').style.display = 'inline'; document.getElementById('2501.18736v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18736v1-abstract-full" style="display: none;"> Magnetic Resonance Imaging (MRI) offers critical insights into microstructural details, however, the spatial resolution of standard 1.5T imaging systems is often limited. In contrast, 7T MRI provides significantly enhanced spatial resolution, enabling finer visualization of anatomical structures. Though this, the high cost and limited availability of 7T MRI hinder its widespread use in clinical settings. To address this challenge, a novel Super-Resolution (SR) model is proposed to generate 7T-like MRI from standard 1.5T MRI scans. Our approach leverages a diffusion-based architecture, incorporating gradient nonlinearity correction and bias field correction data from 7T imaging as guidance. Moreover, to improve deployability, a progressive distillation strategy is introduced. Specifically, the student model refines the 7T SR task with steps, leveraging feature maps from the inference phase of the teacher model as guidance, aiming to allow the student model to achieve progressively 7T SR performance with a smaller, deployable model size. Experimental results demonstrate that our baseline teacher model achieves state-of-the-art SR performance. The student model, while lightweight, sacrifices minimal performance. Furthermore, the student model is capable of accepting MRI inputs at varying resolutions without the need for retraining, significantly further enhancing deployment flexibility. The clinical relevance of our proposed method is validated using clinical data from Massachusetts General Hospital. Our code is available at https://github.com/ZWang78/SR. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18736v1-abstract-full').style.display = 'none'; document.getElementById('2501.18736v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18500">arXiv:2501.18500</a> <span> [<a href="https://arxiv.org/pdf/2501.18500">pdf</a>, <a href="https://arxiv.org/format/2501.18500">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> HSRMamba: Contextual Spatial-Spectral State Space Model for Single Hyperspectral Super-Resolution </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+S">Shi Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lefei Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Liangpei Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18500v1-abstract-short" style="display: inline;"> Mamba has demonstrated exceptional performance in visual tasks due to its powerful global modeling capabilities and linear computational complexity, offering considerable potential in hyperspectral image super-resolution (HSISR). However, in HSISR, Mamba faces challenges as transforming images into 1D sequences neglects the spatial-spectral structural relationships between locally adjacent pixels,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18500v1-abstract-full').style.display = 'inline'; document.getElementById('2501.18500v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18500v1-abstract-full" style="display: none;"> Mamba has demonstrated exceptional performance in visual tasks due to its powerful global modeling capabilities and linear computational complexity, offering considerable potential in hyperspectral image super-resolution (HSISR). However, in HSISR, Mamba faces challenges as transforming images into 1D sequences neglects the spatial-spectral structural relationships between locally adjacent pixels, and its performance is highly sensitive to input order, which affects the restoration of both spatial and spectral details. In this paper, we propose HSRMamba, a contextual spatial-spectral modeling state space model for HSISR, to address these issues both locally and globally. Specifically, a local spatial-spectral partitioning mechanism is designed to establish patch-wise causal relationships among adjacent pixels in 3D features, mitigating the local forgetting issue. Furthermore, a global spectral reordering strategy based on spectral similarity is employed to enhance the causal representation of similar pixels across both spatial and spectral dimensions. Finally, experimental results demonstrate our HSRMamba outperforms the state-of-the-art methods in quantitative quality and visual results. Code will be available soon. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18500v1-abstract-full').style.display = 'none'; document.getElementById('2501.18500v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.17286">arXiv:2501.17286</a> <span> [<a href="https://arxiv.org/pdf/2501.17286">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Medical Physics">physics.med-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Fine-Tuning Open-Source Large Language Models to Improve Their Performance on Radiation Oncology Tasks: A Feasibility Study to Investigate Their Potential Clinical Applications in Radiation Oncology </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+P">Peilong Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhengliang Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yiwei Li</a>, <a href="/search/cs?searchtype=author&query=Holmes%2C+J">Jason Holmes</a>, <a href="/search/cs?searchtype=author&query=Shu%2C+P">Peng Shu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lian Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiang Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Q">Quanzheng Li</a>, <a href="/search/cs?searchtype=author&query=Laughlin%2C+B+S">Brady S. Laughlin</a>, <a href="/search/cs?searchtype=author&query=Toesca%2C+D+S">Diego Santos Toesca</a>, <a href="/search/cs?searchtype=author&query=Vora%2C+S+A">Sujay A. Vora</a>, <a href="/search/cs?searchtype=author&query=Patel%2C+S+H">Samir H. Patel</a>, <a href="/search/cs?searchtype=author&query=Sio%2C+T+T">Terence T. Sio</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+T">Tianming Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+W">Wei Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.17286v1-abstract-short" style="display: inline;"> Background: The radiation oncology clinical practice involves many steps relying on the dynamic interplay of abundant text data. Large language models have displayed remarkable capabilities in processing complex text information. But their direct applications in specific fields like radiation oncology remain underexplored. Purpose: This study aims to investigate whether fine-tuning LLMs with dom… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17286v1-abstract-full').style.display = 'inline'; document.getElementById('2501.17286v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.17286v1-abstract-full" style="display: none;"> Background: The radiation oncology clinical practice involves many steps relying on the dynamic interplay of abundant text data. Large language models have displayed remarkable capabilities in processing complex text information. But their direct applications in specific fields like radiation oncology remain underexplored. Purpose: This study aims to investigate whether fine-tuning LLMs with domain knowledge can improve the performance on Task (1) treatment regimen generation, Task (2) treatment modality selection (photon, proton, electron, or brachytherapy), and Task (3) ICD-10 code prediction in radiation oncology. Methods: Data for 15,724 patient cases were extracted. Cases where patients had a single diagnostic record, and a clearly identifiable primary treatment plan were selected for preprocessing and manual annotation to have 7,903 cases of the patient diagnosis, treatment plan, treatment modality, and ICD-10 code. Each case was used to construct a pair consisting of patient diagnostics details and an answer (treatment regimen, treatment modality, or ICD-10 code respectively) for the supervised fine-tuning of these three tasks. Open source LLaMA2-7B and Mistral-7B models were utilized for the fine-tuning with the Low-Rank Approximations method. Accuracy and ROUGE-1 score were reported for the fine-tuned models and original models. Clinical evaluation was performed on Task (1) by radiation oncologists, while precision, recall, and F-1 score were evaluated for Task (2) and (3). One-sided Wilcoxon signed-rank tests were used to statistically analyze the results. Results: Fine-tuned LLMs outperformed original LLMs across all tasks with p-value <= 0.001. Clinical evaluation demonstrated that over 60% of the fine-tuned LLMs-generated treatment regimens were clinically acceptable. Precision, recall, and F1-score showed improved performance of fine-tuned LLMs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17286v1-abstract-full').style.display = 'none'; document.getElementById('2501.17286v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.16671">arXiv:2501.16671</a> <span> [<a href="https://arxiv.org/pdf/2501.16671">pdf</a>, <a href="https://arxiv.org/format/2501.16671">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Data-Free Model-Related Attacks: Unleashing the Potential of Generative AI </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ye%2C+D">Dayong Ye</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+T">Tianqing Zhu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shang Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+B">Bo Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L+Y">Leo Yu Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+W">Wanlei Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yang Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.16671v1-abstract-short" style="display: inline;"> Generative AI technology has become increasingly integrated into our daily lives, offering powerful capabilities to enhance productivity. However, these same capabilities can be exploited by adversaries for malicious purposes. While existing research on adversarial applications of generative AI predominantly focuses on cyberattacks, less attention has been given to attacks targeting deep learning… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16671v1-abstract-full').style.display = 'inline'; document.getElementById('2501.16671v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.16671v1-abstract-full" style="display: none;"> Generative AI technology has become increasingly integrated into our daily lives, offering powerful capabilities to enhance productivity. However, these same capabilities can be exploited by adversaries for malicious purposes. While existing research on adversarial applications of generative AI predominantly focuses on cyberattacks, less attention has been given to attacks targeting deep learning models. In this paper, we introduce the use of generative AI for facilitating model-related attacks, including model extraction, membership inference, and model inversion. Our study reveals that adversaries can launch a variety of model-related attacks against both image and text models in a data-free and black-box manner, achieving comparable performance to baseline methods that have access to the target models' training data and parameters in a white-box manner. This research serves as an important early warning to the community about the potential risks associated with generative AI-powered attacks on deep learning models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16671v1-abstract-full').style.display = 'none'; document.getElementById('2501.16671v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at USENIX Security 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.16663">arXiv:2501.16663</a> <span> [<a href="https://arxiv.org/pdf/2501.16663">pdf</a>, <a href="https://arxiv.org/format/2501.16663">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Data Duplication: A Novel Multi-Purpose Attack Paradigm in Machine Unlearning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ye%2C+D">Dayong Ye</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+T">Tainqing Zhu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiayang Li</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+K">Kun Gao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+B">Bo Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L+Y">Leo Yu Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+W">Wanlei Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yang Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.16663v1-abstract-short" style="display: inline;"> Duplication is a prevalent issue within datasets. Existing research has demonstrated that the presence of duplicated data in training datasets can significantly influence both model performance and data privacy. However, the impact of data duplication on the unlearning process remains largely unexplored. This paper addresses this gap by pioneering a comprehensive investigation into the role of dat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16663v1-abstract-full').style.display = 'inline'; document.getElementById('2501.16663v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.16663v1-abstract-full" style="display: none;"> Duplication is a prevalent issue within datasets. Existing research has demonstrated that the presence of duplicated data in training datasets can significantly influence both model performance and data privacy. However, the impact of data duplication on the unlearning process remains largely unexplored. This paper addresses this gap by pioneering a comprehensive investigation into the role of data duplication, not only in standard machine unlearning but also in federated and reinforcement unlearning paradigms. Specifically, we propose an adversary who duplicates a subset of the target model's training set and incorporates it into the training set. After training, the adversary requests the model owner to unlearn this duplicated subset, and analyzes the impact on the unlearned model. For example, the adversary can challenge the model owner by revealing that, despite efforts to unlearn it, the influence of the duplicated subset remains in the model. Moreover, to circumvent detection by de-duplication techniques, we propose three novel near-duplication methods for the adversary, each tailored to a specific unlearning paradigm. We then examine their impacts on the unlearning process when de-duplication techniques are applied. Our findings reveal several crucial insights: 1) the gold standard unlearning method, retraining from scratch, fails to effectively conduct unlearning under certain conditions; 2) unlearning duplicated data can lead to significant model degradation in specific scenarios; and 3) meticulously crafted duplicates can evade detection by de-duplication methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16663v1-abstract-full').style.display = 'none'; document.getElementById('2501.16663v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at USENIX Security 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.16409">arXiv:2501.16409</a> <span> [<a href="https://arxiv.org/pdf/2501.16409">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neurons and Cognition">q-bio.NC</span> </div> </div> <p class="title is-5 mathjax"> Classification of Mild Cognitive Impairment Based on Dynamic Functional Connectivity Using Spatio-Temporal Transformer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jing Zhang</a>, <a href="/search/cs?searchtype=author&query=Lyu%2C+Y">Yanjun Lyu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+X">Xiaowei Yu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lu Zhang</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+C">Chao Cao</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+T">Tong Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+M">Minheng Chen</a>, <a href="/search/cs?searchtype=author&query=Zhuang%2C+Y">Yan Zhuang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+T">Tianming Liu</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+D">Dajiang Zhu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.16409v1-abstract-short" style="display: inline;"> Dynamic functional connectivity (dFC) using resting-state functional magnetic resonance imaging (rs-fMRI) is an advanced technique for capturing the dynamic changes of neural activities, and can be very useful in the studies of brain diseases such as Alzheimer's disease (AD). Yet, existing studies have not fully leveraged the sequential information embedded within dFC that can potentially provide… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16409v1-abstract-full').style.display = 'inline'; document.getElementById('2501.16409v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.16409v1-abstract-full" style="display: none;"> Dynamic functional connectivity (dFC) using resting-state functional magnetic resonance imaging (rs-fMRI) is an advanced technique for capturing the dynamic changes of neural activities, and can be very useful in the studies of brain diseases such as Alzheimer's disease (AD). Yet, existing studies have not fully leveraged the sequential information embedded within dFC that can potentially provide valuable information when identifying brain conditions. In this paper, we propose a novel framework that jointly learns the embedding of both spatial and temporal information within dFC based on the transformer architecture. Specifically, we first construct dFC networks from rs-fMRI data through a sliding window strategy. Then, we simultaneously employ a temporal block and a spatial block to capture higher-order representations of dynamic spatio-temporal dependencies, via mapping them into an efficient fused feature representation. To further enhance the robustness of these feature representations by reducing the dependency on labeled data, we also introduce a contrastive learning strategy to manipulate different brain states. Experimental results on 345 subjects with 570 scans from the Alzheimer's Disease Neuroimaging Initiative (ADNI) demonstrate the superiority of our proposed method for MCI (Mild Cognitive Impairment, the prodromal stage of AD) prediction, highlighting its potential for early identification of AD. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16409v1-abstract-full').style.display = 'none'; document.getElementById('2501.16409v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.16393">arXiv:2501.16393</a> <span> [<a href="https://arxiv.org/pdf/2501.16393">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Improving Network Threat Detection by Knowledge Graph, Large Language Model, and Imbalanced Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lili Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Q">Quanyan Zhu</a>, <a href="/search/cs?searchtype=author&query=Ray%2C+H">Herman Ray</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+Y">Ying Xie</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.16393v1-abstract-short" style="display: inline;"> Network threat detection has been challenging due to the complexities of attack activities and the limitation of historical threat data to learn from. To help enhance the existing practices of using analytics, machine learning, and artificial intelligence methods to detect the network threats, we propose an integrated modelling framework, where Knowledge Graph is used to analyze the users' activit… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16393v1-abstract-full').style.display = 'inline'; document.getElementById('2501.16393v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.16393v1-abstract-full" style="display: none;"> Network threat detection has been challenging due to the complexities of attack activities and the limitation of historical threat data to learn from. To help enhance the existing practices of using analytics, machine learning, and artificial intelligence methods to detect the network threats, we propose an integrated modelling framework, where Knowledge Graph is used to analyze the users' activity patterns, Imbalanced Learning techniques are used to prune and weigh Knowledge Graph, and LLM is used to retrieve and interpret the users' activities from Knowledge Graph. The proposed framework is applied to Agile Threat Detection through Online Sequential Learning. The preliminary results show the improved threat capture rate by 3%-4% and the increased interpretabilities of risk predictions based on the users' activities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16393v1-abstract-full').style.display = 'none'; document.getElementById('2501.16393v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by "Combining AI and OR/MS for Better Trustworthy Decision Making" Bridge Program co-organized by AAAI and INFORMS as poster and demo</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Zhang%2C+L&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Zhang%2C+L&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhang%2C+L&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhang%2C+L&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhang%2C+L&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Zhang%2C+L&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository