CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 1,303 results for author: <span class="mathjax">Li, R</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Li%2C+R">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Li, R"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Li%2C+R&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Li, R"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Li%2C+R&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Li%2C+R&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Li%2C+R&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Li%2C+R&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Li%2C+R&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Li%2C+R&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09858">arXiv:2502.09858</a> <span> [<a href="https://arxiv.org/pdf/2502.09858">pdf</a>, <a href="https://arxiv.org/format/2502.09858">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> Automated Hypothesis Validation with Agentic Sequential Falsifications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kexin Huang</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+Y">Ying Jin</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Ryan Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M+Y">Michael Y. Li</a>, <a href="/search/cs?searchtype=author&query=Cand%C3%A8s%2C+E">Emmanuel Cand猫s</a>, <a href="/search/cs?searchtype=author&query=Leskovec%2C+J">Jure Leskovec</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09858v1-abstract-short" style="display: inline;"> Hypotheses are central to information acquisition, decision-making, and discovery. However, many real-world hypotheses are abstract, high-level statements that are difficult to validate directly. This challenge is further intensified by the rise of hypothesis generation from Large Language Models (LLMs), which are prone to hallucination and produce hypotheses in volumes that make manual validation… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09858v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09858v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09858v1-abstract-full" style="display: none;"> Hypotheses are central to information acquisition, decision-making, and discovery. However, many real-world hypotheses are abstract, high-level statements that are difficult to validate directly. This challenge is further intensified by the rise of hypothesis generation from Large Language Models (LLMs), which are prone to hallucination and produce hypotheses in volumes that make manual validation impractical. Here we propose Popper, an agentic framework for rigorous automated validation of free-form hypotheses. Guided by Karl Popper's principle of falsification, Popper validates a hypothesis using LLM agents that design and execute falsification experiments targeting its measurable implications. A novel sequential testing framework ensures strict Type-I error control while actively gathering evidence from diverse observations, whether drawn from existing data or newly conducted procedures. We demonstrate Popper on six domains including biology, economics, and sociology. Popper delivers robust error control, high power, and scalability. Furthermore, compared to human scientists, Popper achieved comparable performance in validating complex biological hypotheses while reducing time by 10 folds, providing a scalable, rigorous solution for hypothesis validation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09858v1-abstract-full').style.display = 'none'; document.getElementById('2502.09858v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09846">arXiv:2502.09846</a> <span> [<a href="https://arxiv.org/pdf/2502.09846">pdf</a>, <a href="https://arxiv.org/format/2502.09846">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> </div> </div> <p class="title is-5 mathjax"> Robust Event-Triggered Integrated Communication and Control with Graph Information Bottleneck Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Ziqiong Wang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+X">Xiaoxue Yu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Rongpeng Li</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Zhifeng Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09846v1-abstract-short" style="display: inline;"> Integrated communication and control serves as a critical ingredient in Multi-Agent Reinforcement Learning. However, partial observability limitations will impair collaboration effectiveness, and a potential solution is to establish consensus through well-calibrated latent variables obtained from neighboring agents. Nevertheless, the rigid transmission of less informative content can still result… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09846v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09846v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09846v1-abstract-full" style="display: none;"> Integrated communication and control serves as a critical ingredient in Multi-Agent Reinforcement Learning. However, partial observability limitations will impair collaboration effectiveness, and a potential solution is to establish consensus through well-calibrated latent variables obtained from neighboring agents. Nevertheless, the rigid transmission of less informative content can still result in redundant information exchanges. Therefore, we propose a Consensus-Driven Event-Based Graph Information Bottleneck (CDE-GIB) method, which integrates the communication graph and information flow through a GIB regularizer to extract more concise message representations while avoiding the high computational complexity of inner-loop operations. To further minimize the communication volume required for establishing consensus during interactions, we also develop a variable-threshold event-triggering mechanism. By simultaneously considering historical data and current observations, this mechanism capably evaluates the importance of information to determine whether an event should be triggered. Experimental results demonstrate that our proposed method outperforms existing state-of-the-art methods in terms of both efficiency and adaptability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09846v1-abstract-full').style.display = 'none'; document.getElementById('2502.09846v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09723">arXiv:2502.09723</a> <span> [<a href="https://arxiv.org/pdf/2502.09723">pdf</a>, <a href="https://arxiv.org/format/2502.09723">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Making Them a Malicious Database: Exploiting Query Code to Jailbreak Aligned Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zou%2C+Q">Qingsong Zou</a>, <a href="/search/cs?searchtype=author&query=Xiao%2C+J">Jingyu Xiao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Q">Qing Li</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Z">Zhi Yan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yuhang Wang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+L">Li Xu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Wenxuan Wang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+K">Kuofeng Gao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Ruoyu Li</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yong Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09723v1-abstract-short" style="display: inline;"> Recent advances in large language models (LLMs) have demonstrated remarkable potential in the field of natural language processing. Unfortunately, LLMs face significant security and ethical risks. Although techniques such as safety alignment are developed for defense, prior researches reveal the possibility of bypassing such defenses through well-designed jailbreak attacks. In this paper, we propo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09723v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09723v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09723v1-abstract-full" style="display: none;"> Recent advances in large language models (LLMs) have demonstrated remarkable potential in the field of natural language processing. Unfortunately, LLMs face significant security and ethical risks. Although techniques such as safety alignment are developed for defense, prior researches reveal the possibility of bypassing such defenses through well-designed jailbreak attacks. In this paper, we propose QueryAttack, a novel framework to systematically examine the generalizability of safety alignment. By treating LLMs as knowledge databases, we translate malicious queries in natural language into code-style structured query to bypass the safety alignment mechanisms of LLMs. We conduct extensive experiments on mainstream LLMs, ant the results show that QueryAttack achieves high attack success rates (ASRs) across LLMs with different developers and capabilities. We also evaluate QueryAttack's performance against common defenses, confirming that it is difficult to mitigate with general defensive techniques. To defend against QueryAttack, we tailor a defense method which can reduce ASR by up to 64\% on GPT-4-1106. The code of QueryAttack can be found on https://anonymous.4open.science/r/QueryAttack-334B. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09723v1-abstract-full').style.display = 'none'; document.getElementById('2502.09723v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages, 11 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09247">arXiv:2502.09247</a> <span> [<a href="https://arxiv.org/pdf/2502.09247">pdf</a>, <a href="https://arxiv.org/format/2502.09247">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> The Joint Entity-Relation Extraction Model Based on Span and Interactive Fusion Representation for Chinese Medical Texts with Complex Semantics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Feng%2C+D">Danni Feng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Runzhi Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jing Wang</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+S">Siyu Yan</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+L">Lihong Ma</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+Y">Yunli Xing</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09247v1-abstract-short" style="display: inline;"> Joint entity-relation extraction is a critical task in transforming unstructured or semi-structured text into triplets, facilitating the construction of large-scale knowledge graphs, and supporting various downstream applications. Despite its importance, research on Chinese text, particularly with complex semantics in specialized domains like medicine, remains limited. To address this gap, we intr… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09247v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09247v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09247v1-abstract-full" style="display: none;"> Joint entity-relation extraction is a critical task in transforming unstructured or semi-structured text into triplets, facilitating the construction of large-scale knowledge graphs, and supporting various downstream applications. Despite its importance, research on Chinese text, particularly with complex semantics in specialized domains like medicine, remains limited. To address this gap, we introduce the CH-DDI, a Chinese drug-drug interactions dataset designed to capture the intricacies of medical text. Leveraging the strengths of attention mechanisms in capturing long-range dependencies, we propose the SEA module, which enhances the extraction of complex contextual semantic information, thereby improving entity recognition and relation extraction. Additionally, to address the inefficiencies of existing methods in facilitating information exchange between entity recognition and relation extraction, we present an interactive fusion representation module. This module employs Cross Attention for bidirectional information exchange between the tasks and further refines feature extraction through BiLSTM. Experimental results on both our CH-DDI dataset and public CoNLL04 dataset demonstrate that our model exhibits strong generalization capabilities. On the CH-DDI dataset, our model achieves an F1-score of 96.73% for entity recognition and 78.43% for relation extraction. On the CoNLL04 dataset, it attains an entity recognition precision of 89.54% and a relation extraction accuracy of 71.64%. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09247v1-abstract-full').style.display = 'none'; document.getElementById('2502.09247v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09073">arXiv:2502.09073</a> <span> [<a href="https://arxiv.org/pdf/2502.09073">pdf</a>, <a href="https://arxiv.org/format/2502.09073">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Enhancing RAG with Active Learning on Conversation Records: Reject Incapables and Answer Capables </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Geng%2C+X">Xuzhao Geng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haozhao Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jun Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+W">Wei Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Ruixuan Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09073v1-abstract-short" style="display: inline;"> Retrieval-augmented generation (RAG) is a key technique for leveraging external knowledge and reducing hallucinations in large language models (LLMs). However, RAG still struggles to fully prevent hallucinated responses. To address this, it is essential to identify samples prone to hallucination or guide LLMs toward correct responses, which experts then annotate to develop high-quality datasets fo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09073v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09073v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09073v1-abstract-full" style="display: none;"> Retrieval-augmented generation (RAG) is a key technique for leveraging external knowledge and reducing hallucinations in large language models (LLMs). However, RAG still struggles to fully prevent hallucinated responses. To address this, it is essential to identify samples prone to hallucination or guide LLMs toward correct responses, which experts then annotate to develop high-quality datasets for refining LLMs. However, the growing scarcity of such datasets makes their creation challenging. This paper proposes using the vast amount of conversations from widespread LLM usage to build these datasets, training LLMs to avoid hallucination-prone questions while accurately responding to manageable ones. Given the impracticality of expert-annotating all conversation records, the paper introduces AL4RAG, which uses active learning to select the most suitable conversation samples for annotation, optimizing performance within an annotation budget. Additionally, recognizing that traditional active learning methods are not fully compatible with RAG due to unsuitable distance metrics, we develop a novel sample distance measurement for RAG active learning. Extensive experiments show that our method consistently outperforms baselines across multiple metrics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09073v1-abstract-full').style.display = 'none'; document.getElementById('2502.09073v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06834">arXiv:2502.06834</a> <span> [<a href="https://arxiv.org/pdf/2502.06834">pdf</a>, <a href="https://arxiv.org/format/2502.06834">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A Unified Knowledge-Distillation and Semi-Supervised Learning Framework to Improve Industrial Ads Delivery Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Eghbalzadeh%2C+H">Hamid Eghbalzadeh</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yang Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Rui Li</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+Y">Yuji Mo</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+Q">Qin Ding</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+J">Jiaxiang Fu</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+L">Liang Dai</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+S">Shuo Gu</a>, <a href="/search/cs?searchtype=author&query=Noorshams%2C+N">Nima Noorshams</a>, <a href="/search/cs?searchtype=author&query=Park%2C+S">Sem Park</a>, <a href="/search/cs?searchtype=author&query=Long%2C+B">Bo Long</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+X">Xue Feng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06834v1-abstract-short" style="display: inline;"> Industrial ads ranking systems conventionally rely on labeled impression data, which leads to challenges such as overfitting, slower incremental gain from model scaling, and biases due to discrepancies between training and serving data. To overcome these issues, we propose a Unified framework for Knowledge-Distillation and Semi-supervised Learning (UKDSL) for ads ranking, empowering the training o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06834v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06834v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06834v1-abstract-full" style="display: none;"> Industrial ads ranking systems conventionally rely on labeled impression data, which leads to challenges such as overfitting, slower incremental gain from model scaling, and biases due to discrepancies between training and serving data. To overcome these issues, we propose a Unified framework for Knowledge-Distillation and Semi-supervised Learning (UKDSL) for ads ranking, empowering the training of models on a significantly larger and more diverse datasets, thereby reducing overfitting and mitigating training-serving data discrepancies. We provide detailed formal analysis and numerical simulations on the inherent miscalibration and prediction bias of multi-stage ranking systems, and show empirical evidence of the proposed framework's capability to mitigate those. Compared to prior work, UKDSL can enable models to learn from a much larger set of unlabeled data, hence, improving the performance while being computationally efficient. Finally, we report the successful deployment of UKDSL in an industrial setting across various ranking models, serving users at multi-billion scale, across various surfaces, geological locations, clients, and optimize for various events, which to the best of our knowledge is the first of its kind in terms of the scale and efficiency at which it operates. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06834v1-abstract-full').style.display = 'none'; document.getElementById('2502.06834v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06269">arXiv:2502.06269</a> <span> [<a href="https://arxiv.org/pdf/2502.06269">pdf</a>, <a href="https://arxiv.org/format/2502.06269">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Progressive Collaborative and Semantic Knowledge Fusion for Generative Recommendation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xiao%2C+L">Longtao Xiao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haozhao Wang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Cheng Wang</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+L">Linfei Ji</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yifan Wang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+J">Jieming Zhu</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+Z">Zhenhua Dong</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Rui Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Ruixuan Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06269v1-abstract-short" style="display: inline;"> With the recent surge in interest surrounding generative paradigms, generative recommendation has increasingly attracted the attention of researchers in the recommendation community. This paradigm generally consists of two stages. In the first stage, pretrained semantic embeddings or collaborative ID embeddings are quantized to create item codes, aiming to capture and preserve rich semantic or col… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06269v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06269v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06269v1-abstract-full" style="display: none;"> With the recent surge in interest surrounding generative paradigms, generative recommendation has increasingly attracted the attention of researchers in the recommendation community. This paradigm generally consists of two stages. In the first stage, pretrained semantic embeddings or collaborative ID embeddings are quantized to create item codes, aiming to capture and preserve rich semantic or collaborative knowledge within these codes. The second stage involves utilizing these discrete codes to perform an autoregressive sequence generation task. Existing methods often either overlook collaborative or semantic knowledge, or combine the two roughly. In this paper, we observe that naively concatenating representations from semantic and collaborative modality leads to a semantic domination issue, where the resulting representation is overly influenced by semantic information, effectively overshadowing the collaborative representation. Consequently, downstream recommendation tasks fail to fully exploit the knowledge from both modalities, resulting in suboptimal performance. To address this, we propose a progressive collaborative and semantic knowledge fusion model for generative recommendation, named PRORec, which integrates semantic and collaborative knowledge with a unified code through a two-stage framework. Specifically, in the first stage, we propose a cross-modality knowledge alignment task, which integrates semantic knowledge into collaborative embeddings, enhancing their representational capability. In the second stage, we propose an in-modality knowledge distillation task, designed to effectively capture and integrate knowledge from both semantic and collaborative modalities. Extensive experiments on three widely used benchmarks validate the effectiveness of our approach, demonstrating its superiority compared to existing methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06269v1-abstract-full').style.display = 'none'; document.getElementById('2502.06269v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05562">arXiv:2502.05562</a> <span> [<a href="https://arxiv.org/pdf/2502.05562">pdf</a>, <a href="https://arxiv.org/format/2502.05562">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> </div> </div> <p class="title is-5 mathjax"> Can Large Language Models Be Query Optimizer for Relational Databases? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tan%2C+J">Jie Tan</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+K">Kangfei Zhao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Rui Li</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+J+X">Jeffrey Xu Yu</a>, <a href="/search/cs?searchtype=author&query=Piao%2C+C">Chengzhi Piao</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+H">Hong Cheng</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+H">Helen Meng</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+D">Deli Zhao</a>, <a href="/search/cs?searchtype=author&query=Rong%2C+Y">Yu Rong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05562v1-abstract-short" style="display: inline;"> Query optimization, which finds the optimized execution plan for a given query, is a complex planning and decision-making problem within the exponentially growing plan space in database management systems (DBMS). Traditional optimizers heavily rely on a certain cost model constructed by various heuristics and empirical tuning, probably leading to generating suboptimal plans. Recent developments of… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05562v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05562v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05562v1-abstract-full" style="display: none;"> Query optimization, which finds the optimized execution plan for a given query, is a complex planning and decision-making problem within the exponentially growing plan space in database management systems (DBMS). Traditional optimizers heavily rely on a certain cost model constructed by various heuristics and empirical tuning, probably leading to generating suboptimal plans. Recent developments of Large Language Models (LLMs) have demonstrated their potential in solving complex planning and decision-making problems, such as arithmetic and programmatic tasks. In this paper, we try to explore the potential of LLMs in handling query optimization and propose a tentative LLM-based query optimizer dubbed LLM-QO, established on PostgreSQL's execution engine. In LLM-QO, we formulate query optimization in an autoregressive fashion which directly generates the execution plan without explicit plan enumeration. To investigate the essential input of LLM-QO, we design a customized data recipe named QInstruct to collect the training data from various optimizers and serialize the database's meta data, queries and corresponding plans into a textual format. Based on QInstruct, we implement a two-stage fine-tuning pipeline, Query Instruction Tuning (QIT) and Query Direct Preference Optimization (QDPO), to empower the capability of general-purpose LLMs in handling query optimization. In our experiments, LLM-QO can generate valid and high-quality plans and consistently outperforms both traditional and learned optimizers on three query workloads. Our findings verify that LLMs can be derived as query optimizers where generalization, efficiency and adaptivity deserve further research efforts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05562v1-abstract-full').style.display = 'none'; document.getElementById('2502.05562v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05416">arXiv:2502.05416</a> <span> [<a href="https://arxiv.org/pdf/2502.05416">pdf</a>, <a href="https://arxiv.org/format/2502.05416">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Deep Generative Models with Hard Linear Equality Constraints </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+R">Ruoyan Li</a>, <a href="/search/cs?searchtype=author&query=Sahu%2C+D+R">Dipti Ranjan Sahu</a>, <a href="/search/cs?searchtype=author&query=Broeck%2C+G+V+d">Guy Van den Broeck</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zhe Zeng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05416v2-abstract-short" style="display: inline;"> While deep generative models~(DGMs) have demonstrated remarkable success in capturing complex data distributions, they consistently fail to learn constraints that encode domain knowledge and thus require constraint integration. Existing solutions to this challenge have primarily relied on heuristic methods and often ignore the underlying data distribution, harming the generative performance. In th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05416v2-abstract-full').style.display = 'inline'; document.getElementById('2502.05416v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05416v2-abstract-full" style="display: none;"> While deep generative models~(DGMs) have demonstrated remarkable success in capturing complex data distributions, they consistently fail to learn constraints that encode domain knowledge and thus require constraint integration. Existing solutions to this challenge have primarily relied on heuristic methods and often ignore the underlying data distribution, harming the generative performance. In this work, we propose a probabilistically sound approach for enforcing the hard constraints into DGMs to generate constraint-compliant and realistic data. This is achieved by our proposed gradient estimators that allow the constrained distribution, the data distribution conditioned on constraints, to be differentiably learned. We carry out extensive experiments with various DGM model architectures over five image datasets and three scientific applications in which domain knowledge is governed by linear equality constraints. We validate that the standard DGMs almost surely generate data violating the constraints. Among all the constraint integration strategies, ours not only guarantees the satisfaction of constraints in generation but also archives superior generative performance than the other methods across every benchmark. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05416v2-abstract-full').style.display = 'none'; document.getElementById('2502.05416v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05073">arXiv:2502.05073</a> <span> [<a href="https://arxiv.org/pdf/2502.05073">pdf</a>, <a href="https://arxiv.org/ps/2502.05073">ps</a>, <a href="https://arxiv.org/format/2502.05073">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Probability">math.PR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Complexity">cs.CC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Combinatorics">math.CO</span> </div> </div> <p class="title is-5 mathjax"> Noise Sensitivity of Hierarchical Functions and Deep Learning Lower Bounds in General Product Measures </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+R">Rupert Li</a>, <a href="/search/cs?searchtype=author&query=Mossel%2C+E">Elchanan Mossel</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05073v1-abstract-short" style="display: inline;"> Recent works explore deep learning's success by examining functions or data with hierarchical structure. Complementarily, research on gradient descent performance for deep nets has shown that noise sensitivity of functions under independent and identically distributed (i.i.d.) Bernoulli inputs establishes learning complexity bounds. This paper aims to bridge these research streams by demonstrating… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05073v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05073v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05073v1-abstract-full" style="display: none;"> Recent works explore deep learning's success by examining functions or data with hierarchical structure. Complementarily, research on gradient descent performance for deep nets has shown that noise sensitivity of functions under independent and identically distributed (i.i.d.) Bernoulli inputs establishes learning complexity bounds. This paper aims to bridge these research streams by demonstrating that functions constructed through repeated composition of non-linear functions are noise sensitive under general product measures. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05073v1-abstract-full').style.display = 'none'; document.getElementById('2502.05073v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04387">arXiv:2502.04387</a> <span> [<a href="https://arxiv.org/pdf/2502.04387">pdf</a>, <a href="https://arxiv.org/format/2502.04387">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> FedP$^2$EFT: Federated Learning to Personalize Parameter Efficient Fine-Tuning for Multilingual LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lee%2C+R">Royson Lee</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+M">Minyoung Kim</a>, <a href="/search/cs?searchtype=author&query=Rezk%2C+F">Fady Rezk</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Rui Li</a>, <a href="/search/cs?searchtype=author&query=Venieris%2C+S+I">Stylianos I. Venieris</a>, <a href="/search/cs?searchtype=author&query=Hospedales%2C+T">Timothy Hospedales</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04387v1-abstract-short" style="display: inline;"> Federated learning (FL) has enabled the training of multilingual large language models (LLMs) on diverse and decentralized multilingual data, especially on low-resource languages. To improve client-specific performance, personalization via the use of parameter-efficient fine-tuning (PEFT) modules such as LoRA is common. This involves a personalization strategy (PS), such as the design of the PEFT… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04387v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04387v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04387v1-abstract-full" style="display: none;"> Federated learning (FL) has enabled the training of multilingual large language models (LLMs) on diverse and decentralized multilingual data, especially on low-resource languages. To improve client-specific performance, personalization via the use of parameter-efficient fine-tuning (PEFT) modules such as LoRA is common. This involves a personalization strategy (PS), such as the design of the PEFT adapter structures (e.g., in which layers to add LoRAs and what ranks) and choice of hyperparameters (e.g., learning rates) for fine-tuning. Instead of manual PS configuration, we propose FedP$^2$EFT, a federated learning-to-personalize method for multilingual LLMs in cross-device FL settings. Unlike most existing PEFT structure selection methods, which are prone to overfitting low-data regimes, FedP$^2$EFT collaboratively learns the optimal personalized PEFT structure for each client via Bayesian sparse rank selection. Evaluations on both simulated and real-world multilingual FL benchmarks demonstrate that FedP$^2$EFT largely outperforms existing personalized fine-tuning methods, while complementing a range of existing FL methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04387v1-abstract-full').style.display = 'none'; document.getElementById('2502.04387v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04345">arXiv:2502.04345</a> <span> [<a href="https://arxiv.org/pdf/2502.04345">pdf</a>, <a href="https://arxiv.org/format/2502.04345">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> JingFang: A Traditional Chinese Medicine Large Language Model of Expert-Level Medical Diagnosis and Syndrome Differentiation-Based Treatment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yehan Yan</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+T">Tianhao Ma</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Ruotai Li</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+X">Xinhan Zheng</a>, <a href="/search/cs?searchtype=author&query=Shan%2C+G">Guodong Shan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Chisheng Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04345v1-abstract-short" style="display: inline;"> Traditional Chinese medicine (TCM) plays a vital role in health protection and disease treatment, but its practical application requires extensive medical knowledge and clinical experience. Existing TCM Large Language Models (LLMs) exhibit critical limitations of uncomprehensive medical consultation and diagnoses, and inaccurate syndrome differentiation-based treatment. To address these issues, th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04345v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04345v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04345v1-abstract-full" style="display: none;"> Traditional Chinese medicine (TCM) plays a vital role in health protection and disease treatment, but its practical application requires extensive medical knowledge and clinical experience. Existing TCM Large Language Models (LLMs) exhibit critical limitations of uncomprehensive medical consultation and diagnoses, and inaccurate syndrome differentiation-based treatment. To address these issues, this study establishes JingFang (JF): a novel TCM Large Language Model that demonstrates the expert-level capability of medical diagnosis and syndrome differentiation-based treatment. We innovate a Multi-agent Dynamic Collaborative Chain-of-Thought Mechanism (MDCCTM) for medical consultation, enabling JF with effective and accurate diagnostic ability. In addition, a Syndrome Agent and a Dual-Stage Retrieval Scheme (DSRS) are developed to significantly enhance the capacity of JF for disease treatment based on syndrome differentiation. JingFang not only facilitates the application of LLMs but also promotes the effective practice of TCM in human health protection and disease treatment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04345v1-abstract-full').style.display = 'none'; document.getElementById('2502.04345v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04093">arXiv:2502.04093</a> <span> [<a href="https://arxiv.org/pdf/2502.04093">pdf</a>, <a href="https://arxiv.org/format/2502.04093">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> PSZ: Enhancing the SZ Scientific Lossy Compressor With Progressive Data Retrieval </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+Z">Zhuoxun Yang</a>, <a href="/search/cs?searchtype=author&query=Di%2C+S">Sheng Di</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Longtao Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Ruoyu Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Ximiao Li</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+J">Jiajun Huang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jinyang Liu</a>, <a href="/search/cs?searchtype=author&query=Cappello%2C+F">Franck Cappello</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+K">Kai Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04093v2-abstract-short" style="display: inline;"> Compression is a crucial solution for data reduction in modern scientific applications due to the exponential growth of data from simulations, experiments, and observations. Compression with progressive retrieval capability allows users to access coarse approximations of data quickly and then incrementally refine these approximations to higher fidelity. Existing progressive compression solutions s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04093v2-abstract-full').style.display = 'inline'; document.getElementById('2502.04093v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04093v2-abstract-full" style="display: none;"> Compression is a crucial solution for data reduction in modern scientific applications due to the exponential growth of data from simulations, experiments, and observations. Compression with progressive retrieval capability allows users to access coarse approximations of data quickly and then incrementally refine these approximations to higher fidelity. Existing progressive compression solutions suffer from low reduction ratios or high operation costs, effectively undermining the approach's benefits. In this paper, we propose the first-ever interpolation-based progressive lossy compression solution that has both high reduction ratios and low operation costs. The interpolation-based algorithm has been verified as one of the best for scientific data reduction, but previously no effort exists to make it support progressive retrieval. Our contributions are three-fold: (1) We thoroughly analyze the error characteristics of the interpolation algorithm and propose our solution IPComp with multi-level bitplane and predictive coding. (2) We derive optimized strategies toward minimum data retrieval under different fidelity levels indicated by users through error bounds and bitrates. (3) We evaluate the proposed solution using six real-world datasets from four diverse domains. Experimental results demonstrate our solution archives up to $487\%$ higher compression ratios and $698\%$ faster speed than other state-of-the-art progressive compressors, and reduces the data volume for retrieval by up to $83\%$ compared to baselines under the same error bound, and reduces the error by up to $99\%$ under the same bitrate. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04093v2-abstract-full').style.display = 'none'; document.getElementById('2502.04093v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03781">arXiv:2502.03781</a> <span> [<a href="https://arxiv.org/pdf/2502.03781">pdf</a>, <a href="https://arxiv.org/ps/2502.03781">ps</a>, <a href="https://arxiv.org/format/2502.03781">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Gaze-Assisted Human-Centric Domain Adaptation for Cardiac Ultrasound Image Segmentation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+R">Ruiyi Li</a>, <a href="/search/cs?searchtype=author&query=He%2C+Y">Yuting He</a>, <a href="/search/cs?searchtype=author&query=Ge%2C+R">Rongjun Ge</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+C">Chong Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+D">Daoqiang Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yang Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shuo Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03781v1-abstract-short" style="display: inline;"> Domain adaptation (DA) for cardiac ultrasound image segmentation is clinically significant and valuable. However, previous domain adaptation methods are prone to be affected by the incomplete pseudo-label and low-quality target to source images. Human-centric domain adaptation has great advantages of human cognitive guidance to help model adapt to target domain and reduce reliance on labels. Docto… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03781v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03781v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03781v1-abstract-full" style="display: none;"> Domain adaptation (DA) for cardiac ultrasound image segmentation is clinically significant and valuable. However, previous domain adaptation methods are prone to be affected by the incomplete pseudo-label and low-quality target to source images. Human-centric domain adaptation has great advantages of human cognitive guidance to help model adapt to target domain and reduce reliance on labels. Doctor gaze trajectories contains a large amount of cross-domain human guidance. To leverage gaze information and human cognition for guiding domain adaptation, we propose gaze-assisted human-centric domain adaptation (GAHCDA), which reliably guides the domain adaptation of cardiac ultrasound images. GAHCDA includes following modules: (1) Gaze Augment Alignment (GAA): GAA enables the model to obtain human cognition general features to recognize segmentation target in different domain of cardiac ultrasound images like humans. (2) Gaze Balance Loss (GBL): GBL fused gaze heatmap with outputs which makes the segmentation result structurally closer to the target domain. The experimental results illustrate that our proposed framework is able to segment cardiac ultrasound images more effectively in the target domain than GAN-based methods and other self-train based methods, showing great potential in clinical application. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03781v1-abstract-full').style.display = 'none'; document.getElementById('2502.03781v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.03229">arXiv:2502.03229</a> <span> [<a href="https://arxiv.org/pdf/2502.03229">pdf</a>, <a href="https://arxiv.org/format/2502.03229">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> A Unified Framework for Semi-Supervised Image Segmentation and Registration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+R">Ruizhe Li</a>, <a href="/search/cs?searchtype=author&query=Figueredo%2C+G">Grazziela Figueredo</a>, <a href="/search/cs?searchtype=author&query=Auer%2C+D">Dorothee Auer</a>, <a href="/search/cs?searchtype=author&query=Dineen%2C+R">Rob Dineen</a>, <a href="/search/cs?searchtype=author&query=Morgan%2C+P">Paul Morgan</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xin Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.03229v1-abstract-short" style="display: inline;"> Semi-supervised learning, which leverages both annotated and unannotated data, is an efficient approach for medical image segmentation, where obtaining annotations for the whole dataset is time-consuming and costly. Traditional semi-supervised methods primarily focus on extracting features and learning data distributions from unannotated data to enhance model training. In this paper, we introduce… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03229v1-abstract-full').style.display = 'inline'; document.getElementById('2502.03229v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.03229v1-abstract-full" style="display: none;"> Semi-supervised learning, which leverages both annotated and unannotated data, is an efficient approach for medical image segmentation, where obtaining annotations for the whole dataset is time-consuming and costly. Traditional semi-supervised methods primarily focus on extracting features and learning data distributions from unannotated data to enhance model training. In this paper, we introduce a novel approach incorporating an image registration model to generate pseudo-labels for the unannotated data, producing more geometrically correct pseudo-labels to improve the model training. Our method was evaluated on a 2D brain data set, showing excellent performance even using only 1\% of the annotated data. The results show that our approach outperforms conventional semi-supervised segmentation methods (e.g. teacher-student model), particularly in a low percentage of annotation scenario. GitHub: https://github.com/ruizhe-l/UniSegReg. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.03229v1-abstract-full').style.display = 'none'; document.getElementById('2502.03229v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for publication at IEEE International Symposium on Biomedical Imaging (ISBI) 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02504">arXiv:2502.02504</a> <span> [<a href="https://arxiv.org/pdf/2502.02504">pdf</a>, <a href="https://arxiv.org/format/2502.02504">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Unified Spatial-Temporal Edge-Enhanced Graph Networks for Pedestrian Trajectory Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+R">Ruochen Li</a>, <a href="/search/cs?searchtype=author&query=Qiao%2C+T">Tanqiu Qiao</a>, <a href="/search/cs?searchtype=author&query=Katsigiannis%2C+S">Stamos Katsigiannis</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Z">Zhanxing Zhu</a>, <a href="/search/cs?searchtype=author&query=Shum%2C+H+P+H">Hubert P. H. Shum</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02504v1-abstract-short" style="display: inline;"> Pedestrian trajectory prediction aims to forecast future movements based on historical paths. Spatial-temporal (ST) methods often separately model spatial interactions among pedestrians and temporal dependencies of individuals. They overlook the direct impacts of interactions among different pedestrians across various time steps (i.e., high-order cross-time interactions). This limits their ability… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02504v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02504v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02504v1-abstract-full" style="display: none;"> Pedestrian trajectory prediction aims to forecast future movements based on historical paths. Spatial-temporal (ST) methods often separately model spatial interactions among pedestrians and temporal dependencies of individuals. They overlook the direct impacts of interactions among different pedestrians across various time steps (i.e., high-order cross-time interactions). This limits their ability to capture ST inter-dependencies and hinders prediction performance. To address these limitations, we propose UniEdge with three major designs. Firstly, we introduce a unified ST graph data structure that simplifies high-order cross-time interactions into first-order relationships, enabling the learning of ST inter-dependencies in a single step. This avoids the information loss caused by multi-step aggregation. Secondly, traditional GNNs focus on aggregating pedestrian node features, neglecting the propagation of implicit interaction patterns encoded in edge features. We propose the Edge-to-Edge-Node-to-Node Graph Convolution (E2E-N2N-GCN), a novel dual-graph network that jointly models explicit N2N social interactions among pedestrians and implicit E2E influence propagation across these interaction patterns. Finally, to overcome the limited receptive fields and challenges in capturing long-range dependencies of auto-regressive architectures, we introduce a transformer encoder-based predictor that enables global modeling of temporal correlation. UniEdge outperforms state-of-the-arts on multiple datasets, including ETH, UCY, and SDD. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02504v1-abstract-full').style.display = 'none'; document.getElementById('2502.02504v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02096">arXiv:2502.02096</a> <span> [<a href="https://arxiv.org/pdf/2502.02096">pdf</a>, <a href="https://arxiv.org/format/2502.02096">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Dual-Flow: Transferable Multi-Target, Instance-Agnostic Attacks via In-the-wild Cascading Flow Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yixiao Chen</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+S">Shikun Sun</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jianshu Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Ruoyu Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhe Li</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+J">Junliang Xing</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02096v2-abstract-short" style="display: inline;"> Adversarial attacks are widely used to evaluate model robustness, and in black-box scenarios, the transferability of these attacks becomes crucial. Existing generator-based attacks have excellent generalization and transferability due to their instance-agnostic nature. However, when training generators for multi-target tasks, the success rate of transfer attacks is relatively low due to the limita… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02096v2-abstract-full').style.display = 'inline'; document.getElementById('2502.02096v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02096v2-abstract-full" style="display: none;"> Adversarial attacks are widely used to evaluate model robustness, and in black-box scenarios, the transferability of these attacks becomes crucial. Existing generator-based attacks have excellent generalization and transferability due to their instance-agnostic nature. However, when training generators for multi-target tasks, the success rate of transfer attacks is relatively low due to the limitations of the model's capacity. To address these challenges, we propose a novel Dual-Flow framework for multi-target instance-agnostic adversarial attacks, utilizing Cascading Distribution Shift Training to develop an adversarial velocity function. Extensive experiments demonstrate that Dual-Flow significantly improves transferability over previous multi-target generative attacks. For example, it increases the success rate from Inception-v3 to ResNet-152 by 34.58%. Furthermore, our attack method shows substantially stronger robustness against defense mechanisms, such as adversarially trained models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02096v2-abstract-full').style.display = 'none'; document.getElementById('2502.02096v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02021">arXiv:2502.02021</a> <span> [<a href="https://arxiv.org/pdf/2502.02021">pdf</a>, <a href="https://arxiv.org/format/2502.02021">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> Multi-illuminant Color Constancy via Multi-scale Illuminant Estimation and Fusion </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Luo%2C+H">Hang Luo</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Rongwei Li</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+J">Jinxing Liang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02021v1-abstract-short" style="display: inline;"> Multi-illuminant color constancy methods aim to eliminate local color casts within an image through pixel-wise illuminant estimation. Existing methods mainly employ deep learning to establish a direct mapping between an image and its illumination map, which neglects the impact of image scales. To alleviate this problem, we represent an illuminant map as the linear combination of components estimat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02021v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02021v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02021v1-abstract-full" style="display: none;"> Multi-illuminant color constancy methods aim to eliminate local color casts within an image through pixel-wise illuminant estimation. Existing methods mainly employ deep learning to establish a direct mapping between an image and its illumination map, which neglects the impact of image scales. To alleviate this problem, we represent an illuminant map as the linear combination of components estimated from multi-scale images. Furthermore, we propose a tri-branch convolution networks to estimate multi-grained illuminant distribution maps from multi-scale images. These multi-grained illuminant maps are merged adaptively with an attentional illuminant fusion module. Through comprehensive experimental analysis and evaluation, the results demonstrate the effectiveness of our method, and it has achieved state-of-the-art performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02021v1-abstract-full').style.display = 'none'; document.getElementById('2502.02021v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 4 figures, this manuscript is under the consideration of Optics Express</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18619">arXiv:2501.18619</a> <span> [<a href="https://arxiv.org/pdf/2501.18619">pdf</a>, <a href="https://arxiv.org/format/2501.18619">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> FAAGC: Feature Augmentation on Adaptive Geodesic Curve Based on the shape space theory </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Han%2C+Y">Yuexing Han</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Ruijie Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18619v1-abstract-short" style="display: inline;"> Deep learning models have been widely applied across various domains and industries. However, many fields still face challenges due to limited and insufficient data. This paper proposes a Feature Augmentation on Adaptive Geodesic Curve (FAAGC) method in the pre-shape space to increase data. In the pre-shape space, objects with identical shapes lie on a great circle. Thus, we project deep model rep… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18619v1-abstract-full').style.display = 'inline'; document.getElementById('2501.18619v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18619v1-abstract-full" style="display: none;"> Deep learning models have been widely applied across various domains and industries. However, many fields still face challenges due to limited and insufficient data. This paper proposes a Feature Augmentation on Adaptive Geodesic Curve (FAAGC) method in the pre-shape space to increase data. In the pre-shape space, objects with identical shapes lie on a great circle. Thus, we project deep model representations into the pre-shape space and construct a geodesic curve, i.e., an arc of a great circle, for each class. Feature augmentation is then performed by sampling along these geodesic paths. Extensive experiments demonstrate that FAAGC improves classification accuracy under data-scarce conditions and generalizes well across various feature types. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18619v1-abstract-full').style.display = 'none'; document.getElementById('2501.18619v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8pages, 3figures, submitted to IJCAI 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18367">arXiv:2501.18367</a> <span> [<a href="https://arxiv.org/pdf/2501.18367">pdf</a>, <a href="https://arxiv.org/format/2501.18367">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> A Learnable Multi-views Contrastive Framework with Reconstruction Discrepancy for Medical Time-Series </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yifan Wang</a>, <a href="/search/cs?searchtype=author&query=Ai%2C+H">Hongfeng Ai</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Ruiqi Li</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+M">Maowei Jiang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+C">Cheng Jiang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Chenzhong Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18367v1-abstract-short" style="display: inline;"> In medical time series disease diagnosis, two key challenges are identified.First, the high annotation cost of medical data leads to overfitting in models trained on label-limited, single-center datasets. To address this, we propose incorporating external data from related tasks and leveraging AE-GAN to extract prior knowledge,providing valuable references for downstream tasks. Second, many existi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18367v1-abstract-full').style.display = 'inline'; document.getElementById('2501.18367v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18367v1-abstract-full" style="display: none;"> In medical time series disease diagnosis, two key challenges are identified.First, the high annotation cost of medical data leads to overfitting in models trained on label-limited, single-center datasets. To address this, we propose incorporating external data from related tasks and leveraging AE-GAN to extract prior knowledge,providing valuable references for downstream tasks. Second, many existing studies employ contrastive learning to derive more generalized medical sequence representations for diagnostic tasks, usually relying on manually designed diverse positive and negative sample pairs.However, these approaches are complex, lack generalizability, and fail to adaptively capture disease-specific features across different conditions.To overcome this, we introduce LMCF (Learnable Multi-views Contrastive Framework), a framework that integrates a multi-head attention mechanism and adaptively learns representations from different views through inter-view and intra-view contrastive learning strategies.Additionally, the pre-trained AE-GAN is used to reconstruct discrepancies in the target data as disease probabilities, which are then integrated into the contrastive learning process.Experiments on three target datasets demonstrate that our method consistently outperforms seven other baselines, highlighting its significant impact on healthcare applications such as the diagnosis of myocardial infarction, Alzheimer's disease, and Parkinson's disease. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18367v1-abstract-full').style.display = 'none'; document.getElementById('2501.18367v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">15 pages,6 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.6; K.3.6 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.16254">arXiv:2501.16254</a> <span> [<a href="https://arxiv.org/pdf/2501.16254">pdf</a>, <a href="https://arxiv.org/format/2501.16254">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Multi-Agent Geospatial Copilots for Remote Sensing Workflows </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lee%2C+C">Chaehong Lee</a>, <a href="/search/cs?searchtype=author&query=Paramanayakam%2C+V">Varatheepan Paramanayakam</a>, <a href="/search/cs?searchtype=author&query=Karatzas%2C+A">Andreas Karatzas</a>, <a href="/search/cs?searchtype=author&query=Jian%2C+Y">Yanan Jian</a>, <a href="/search/cs?searchtype=author&query=Fore%2C+M">Michael Fore</a>, <a href="/search/cs?searchtype=author&query=Liao%2C+H">Heming Liao</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+F">Fuxun Yu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Ruopu Li</a>, <a href="/search/cs?searchtype=author&query=Anagnostopoulos%2C+I">Iraklis Anagnostopoulos</a>, <a href="/search/cs?searchtype=author&query=Stamoulis%2C+D">Dimitrios Stamoulis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.16254v1-abstract-short" style="display: inline;"> We present GeoLLM-Squad, a geospatial Copilot that introduces the novel multi-agent paradigm to remote sensing (RS) workflows. Unlike existing single-agent approaches that rely on monolithic large language models (LLM), GeoLLM-Squad separates agentic orchestration from geospatial task-solving, by delegating RS tasks to specialized sub-agents. Built on the open-source AutoGen and GeoLLM-Engine fram… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16254v1-abstract-full').style.display = 'inline'; document.getElementById('2501.16254v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.16254v1-abstract-full" style="display: none;"> We present GeoLLM-Squad, a geospatial Copilot that introduces the novel multi-agent paradigm to remote sensing (RS) workflows. Unlike existing single-agent approaches that rely on monolithic large language models (LLM), GeoLLM-Squad separates agentic orchestration from geospatial task-solving, by delegating RS tasks to specialized sub-agents. Built on the open-source AutoGen and GeoLLM-Engine frameworks, our work enables the modular integration of diverse applications, spanning urban monitoring, forestry protection, climate analysis, and agriculture studies. Our results demonstrate that while single-agent systems struggle to scale with increasing RS task complexity, GeoLLM-Squad maintains robust performance, achieving a 17% improvement in agentic correctness over state-of-the-art baselines. Our findings highlight the potential of multi-agent AI in advancing RS workflows. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16254v1-abstract-full').style.display = 'none'; document.getElementById('2501.16254v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.16029">arXiv:2501.16029</a> <span> [<a href="https://arxiv.org/pdf/2501.16029">pdf</a>, <a href="https://arxiv.org/format/2501.16029">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> FDLLM: A Text Fingerprint Detection Method for LLMs in Multi-Language, Multi-Domain Black-Box Environments </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fu%2C+Z">Zhiyuan Fu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Junfan Chen</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+H">Hongyu Sun</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+T">Ting Yang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Ruidong Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yuqing Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.16029v1-abstract-short" style="display: inline;"> Using large language models (LLMs) integration platforms without transparency about which LLM is being invoked can lead to potential security risks. Specifically, attackers may exploit this black-box scenario to deploy malicious models and embed viruses in the code provided to users. In this context, it is increasingly urgent for users to clearly identify the LLM they are interacting with, in orde… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16029v1-abstract-full').style.display = 'inline'; document.getElementById('2501.16029v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.16029v1-abstract-full" style="display: none;"> Using large language models (LLMs) integration platforms without transparency about which LLM is being invoked can lead to potential security risks. Specifically, attackers may exploit this black-box scenario to deploy malicious models and embed viruses in the code provided to users. In this context, it is increasingly urgent for users to clearly identify the LLM they are interacting with, in order to avoid unknowingly becoming victims of malicious models. However, existing studies primarily focus on mixed classification of human and machine-generated text, with limited attention to classifying texts generated solely by different models. Current research also faces dual bottlenecks: poor quality of LLM-generated text (LLMGT) datasets and limited coverage of detectable LLMs, resulting in poor detection performance for various LLMGT in black-box scenarios. We propose the first LLMGT fingerprint detection model, \textbf{FDLLM}, based on Qwen2.5-7B and fine-tuned using LoRA to address these challenges. FDLLM can more efficiently handle detection tasks across multilingual and multi-domain scenarios. Furthermore, we constructed a dataset named \textbf{FD-Datasets}, consisting of 90,000 samples that span multiple languages and domains, covering 20 different LLMs. Experimental results demonstrate that FDLLM achieves a macro F1 score 16.7\% higher than the best baseline method, LM-D. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16029v1-abstract-full').style.display = 'none'; document.getElementById('2501.16029v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.16002">arXiv:2501.16002</a> <span> [<a href="https://arxiv.org/pdf/2501.16002">pdf</a>, <a href="https://arxiv.org/format/2501.16002">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> ScaDyG:A New Paradigm for Large-scale Dynamic Graph Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xiang Wu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xunkai Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Rong-Hua Li</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+K">Kangfei Zhao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+G">Guoren Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.16002v2-abstract-short" style="display: inline;"> Dynamic graphs (DGs), which capture time-evolving relationships between graph entities, have widespread real-world applications. To efficiently encode DGs for downstream tasks, most dynamic graph neural networks follow the traditional message-passing mechanism and extend it with time-based techniques. Despite their effectiveness, the growth of historical interactions introduces significant scalabi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16002v2-abstract-full').style.display = 'inline'; document.getElementById('2501.16002v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.16002v2-abstract-full" style="display: none;"> Dynamic graphs (DGs), which capture time-evolving relationships between graph entities, have widespread real-world applications. To efficiently encode DGs for downstream tasks, most dynamic graph neural networks follow the traditional message-passing mechanism and extend it with time-based techniques. Despite their effectiveness, the growth of historical interactions introduces significant scalability issues, particularly in industry scenarios. To address this limitation, we propose ScaDyG, with the core idea of designing a time-aware scalable learning paradigm as follows: 1) Time-aware Topology Reformulation: ScaDyG first segments historical interactions into time steps (intra and inter) based on dynamic modeling, enabling weight-free and time-aware graph propagation within pre-processing. 2) Dynamic Temporal Encoding: To further achieve fine-grained graph propagation within time steps, ScaDyG integrates temporal encoding through a combination of exponential functions in a scalable manner. 3) Hypernetwork-driven Message Aggregation: After obtaining the propagated features (i.e., messages), ScaDyG utilizes hypernetwork to analyze historical dependencies, implementing node-wise representation by an adaptive temporal fusion. Extensive experiments on 12 datasets demonstrate that ScaDyG performs comparably well or even outperforms other SOTA methods in both node and link-level downstream tasks, with fewer learnable parameters and higher efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.16002v2-abstract-full').style.display = 'none'; document.getElementById('2501.16002v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.15465">arXiv:2501.15465</a> <span> [<a href="https://arxiv.org/pdf/2501.15465">pdf</a>, <a href="https://arxiv.org/ps/2501.15465">ps</a>, <a href="https://arxiv.org/format/2501.15465">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Quantum Physics">quant-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> </div> </div> <p class="title is-5 mathjax"> Geometry of symplectic group and optimal EAQECC codes </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+R">Ruihu Li</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+Y">Yuezhen Ren</a>, <a href="/search/cs?searchtype=author&query=Guan%2C+C">Chaofeng Guan</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yang Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.15465v1-abstract-short" style="display: inline;"> A new type of link between geometry of symplectic group and entanglement-assisted (EA) quantum error-correcting codes (EAQECCs) is presented. Relations of symplectic subspaces and quaternary additive codes concerning parameters of EAQECCs are described. Thus, parameters of EA stabilizer codes are revealed in the nomenclature of additive codes. Our techniques enable us solve some open problems abou… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15465v1-abstract-full').style.display = 'inline'; document.getElementById('2501.15465v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.15465v1-abstract-full" style="display: none;"> A new type of link between geometry of symplectic group and entanglement-assisted (EA) quantum error-correcting codes (EAQECCs) is presented. Relations of symplectic subspaces and quaternary additive codes concerning parameters of EAQECCs are described. Thus, parameters of EA stabilizer codes are revealed in the nomenclature of additive codes. Our techniques enable us solve some open problems about optimal EAQECCs and entanglement-assisted quantum minimum distance separable (EAQMDS) codes, and are also useful for designing encoding and decoding quantum circuit of EA stabilizer codes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.15465v1-abstract-full').style.display = 'none'; document.getElementById('2501.15465v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.14103">arXiv:2501.14103</a> <span> [<a href="https://arxiv.org/pdf/2501.14103">pdf</a>, <a href="https://arxiv.org/format/2501.14103">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Personalized Interpolation: An Efficient Method to Tame Flexible Optimization Window Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xin Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Weiliang Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Rui Li</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+Z">Zihang Fu</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+T">Tongyi Tang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhengyu Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">Wen-Yen Chen</a>, <a href="/search/cs?searchtype=author&query=Noorshams%2C+N">Nima Noorshams</a>, <a href="/search/cs?searchtype=author&query=Jasapara%2C+N">Nirav Jasapara</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+X">Xiaowen Ding</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+E">Ellie Wen</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+X">Xue Feng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.14103v1-abstract-short" style="display: inline;"> In the realm of online advertising, optimizing conversions is crucial for delivering relevant products to users and enhancing business outcomes. Predicting conversion events is challenging due to variable delays between user interactions, such as impressions or clicks, and the actual conversions. These delays differ significantly across various advertisers and products, necessitating distinct opti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14103v1-abstract-full').style.display = 'inline'; document.getElementById('2501.14103v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.14103v1-abstract-full" style="display: none;"> In the realm of online advertising, optimizing conversions is crucial for delivering relevant products to users and enhancing business outcomes. Predicting conversion events is challenging due to variable delays between user interactions, such as impressions or clicks, and the actual conversions. These delays differ significantly across various advertisers and products, necessitating distinct optimization time windows for targeted conversions. To address this, we introduce a novel approach named the \textit{Personalized Interpolation} method, which innovatively builds upon existing fixed conversion window models to estimate flexible conversion windows. This method allows for the accurate estimation of conversions across a variety of delay ranges, thus meeting the diverse needs of advertisers without increasing system complexity. To validate the efficacy of our proposed method, we conducted comprehensive experiments using ads conversion model. Our experiments demonstrate that this method not only achieves high prediction accuracy but also does so more efficiently than other existing solutions. This validation underscores the potential of our Personalized Interpolation method to significantly enhance conversion optimization in real-world online advertising systems, promising improved targeting and effectiveness in advertising strategies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.14103v1-abstract-full').style.display = 'none'; document.getElementById('2501.14103v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.13919">arXiv:2501.13919</a> <span> [<a href="https://arxiv.org/pdf/2501.13919">pdf</a>, <a href="https://arxiv.org/format/2501.13919">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Temporal Preference Optimization for Long-Form Video Understanding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+R">Rui Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaohan Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yuhui Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zeyu Wang</a>, <a href="/search/cs?searchtype=author&query=Yeung-Levy%2C+S">Serena Yeung-Levy</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.13919v2-abstract-short" style="display: inline;"> Despite significant advancements in video large multimodal models (video-LMMs), achieving effective temporal grounding in long-form videos remains a challenge for existing models. To address this limitation, we propose Temporal Preference Optimization (TPO), a novel post-training framework designed to enhance the temporal grounding capabilities of video-LMMs through preference learning. TPO adopts… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13919v2-abstract-full').style.display = 'inline'; document.getElementById('2501.13919v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.13919v2-abstract-full" style="display: none;"> Despite significant advancements in video large multimodal models (video-LMMs), achieving effective temporal grounding in long-form videos remains a challenge for existing models. To address this limitation, we propose Temporal Preference Optimization (TPO), a novel post-training framework designed to enhance the temporal grounding capabilities of video-LMMs through preference learning. TPO adopts a self-training approach that enables models to differentiate between well-grounded and less accurate temporal responses by leveraging curated preference datasets at two granularities: localized temporal grounding, which focuses on specific video segments, and comprehensive temporal grounding, which captures extended temporal dependencies across entire video sequences. By optimizing on these preference datasets, TPO significantly enhances temporal understanding while reducing reliance on manually annotated data. Extensive experiments on three long-form video understanding benchmarks--LongVideoBench, MLVU, and Video-MME--demonstrate the effectiveness of TPO across two state-of-the-art video-LMMs. Notably, LLaVA-Video-TPO establishes itself as the leading 7B model on the Video-MME benchmark, underscoring the potential of TPO as a scalable and efficient solution for advancing temporal reasoning in long-form video understanding. Project page: https://ruili33.github.io/tpo_website. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13919v2-abstract-full').style.display = 'none'; document.getElementById('2501.13919v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.13746">arXiv:2501.13746</a> <span> [<a href="https://arxiv.org/pdf/2501.13746">pdf</a>, <a href="https://arxiv.org/format/2501.13746">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> EICopilot: Search and Explore Enterprise Information over Large-scale Knowledge Graphs with LLM-driven Agents </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yun%2C+Y">Yuhui Yun</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+H">Huilong Ye</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xinru Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Ruojia Li</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+J">Jingfeng Deng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Li Li</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+H">Haoyi Xiong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.13746v1-abstract-short" style="display: inline;"> The paper introduces EICopilot, an novel agent-based solution enhancing search and exploration of enterprise registration data within extensive online knowledge graphs like those detailing legal entities, registered capital, and major shareholders. Traditional methods necessitate text-based queries and manual subgraph explorations, often resulting in time-consuming processes. EICopilot, deployed a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13746v1-abstract-full').style.display = 'inline'; document.getElementById('2501.13746v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.13746v1-abstract-full" style="display: none;"> The paper introduces EICopilot, an novel agent-based solution enhancing search and exploration of enterprise registration data within extensive online knowledge graphs like those detailing legal entities, registered capital, and major shareholders. Traditional methods necessitate text-based queries and manual subgraph explorations, often resulting in time-consuming processes. EICopilot, deployed as a chatbot via Baidu Enterprise Search, improves this landscape by utilizing Large Language Models (LLMs) to interpret natural language queries. This solution automatically generates and executes Gremlin scripts, providing efficient summaries of complex enterprise relationships. Distinct feature a data pre-processing pipeline that compiles and annotates representative queries into a vector database of examples for In-context learning (ICL), a comprehensive reasoning pipeline combining Chain-of-Thought with ICL to enhance Gremlin script generation for knowledge graph search and exploration, and a novel query masking strategy that improves intent recognition for heightened script accuracy. Empirical evaluations demonstrate the superior performance of EICopilot, including speed and accuracy, over baseline methods, with the \emph{Full Mask} variant achieving a syntax error rate reduction to as low as 10.00% and an execution correctness of up to 82.14%. These components collectively contribute to superior querying capabilities and summarization of intricate datasets, positioning EICopilot as a groundbreaking tool in the exploration and exploitation of large-scale knowledge graphs for enterprise information search. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13746v1-abstract-full').style.display = 'none'; document.getElementById('2501.13746v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.12637">arXiv:2501.12637</a> <span> [<a href="https://arxiv.org/pdf/2501.12637">pdf</a>, <a href="https://arxiv.org/format/2501.12637">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DWTNeRF: Boosting Few-shot Neural Radiance Fields via Discrete Wavelet Transform </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nguyen%2C+H">Hung Nguyen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B+R">Blark Runfa Li</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+T">Truong Nguyen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.12637v2-abstract-short" style="display: inline;"> Neural Radiance Fields (NeRF) has achieved superior performance in novel view synthesis and 3D scene representation, but its practical applications are hindered by slow convergence and reliance on dense training views. To this end, we present DWTNeRF, a unified framework based on Instant-NGP's fast-training hash encoding. It is coupled with regularization terms designed for few-shot NeRF, which op… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12637v2-abstract-full').style.display = 'inline'; document.getElementById('2501.12637v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.12637v2-abstract-full" style="display: none;"> Neural Radiance Fields (NeRF) has achieved superior performance in novel view synthesis and 3D scene representation, but its practical applications are hindered by slow convergence and reliance on dense training views. To this end, we present DWTNeRF, a unified framework based on Instant-NGP's fast-training hash encoding. It is coupled with regularization terms designed for few-shot NeRF, which operates on sparse training views. Our DWTNeRF additionally includes a novel Discrete Wavelet loss that allows explicit prioritization of low frequencies directly in the training objective, reducing few-shot NeRF's overfitting on high frequencies in earlier training stages. We also introduce a model-based approach, based on multi-head attention, that is compatible with INGP, which are sensitive to architectural changes. On the 3-shot LLFF benchmark, DWTNeRF outperforms Vanilla INGP by 15.07% in PSNR, 24.45% in SSIM and 36.30% in LPIPS. Our approach encourages a re-thinking of current few-shot approaches for fast-converging implicit representations like INGP or 3DGS. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12637v2-abstract-full').style.display = 'none'; document.getElementById('2501.12637v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 13 figures, 8 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.12040">arXiv:2501.12040</a> <span> [<a href="https://arxiv.org/pdf/2501.12040">pdf</a>, <a href="https://arxiv.org/format/2501.12040">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> </div> </div> <p class="title is-5 mathjax"> Select2Drive: Pragmatic Communications for Real-Time Collaborative Autonomous Driving </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+J">Jiahao Huang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+J">Jianhang Zhu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Rongpeng Li</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Zhifeng Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Honggang Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.12040v1-abstract-short" style="display: inline;"> Vehicle-to-Everything communications-assisted Autonomous Driving (V2X-AD) has witnessed remarkable advancements in recent years, with pragmatic communications (PragComm) emerging as a promising paradigm for real-time collaboration among vehicles and other agents.Simultaneously, extensive research has explored the interplay between collaborative perception and decision-making in end-to-end driving… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12040v1-abstract-full').style.display = 'inline'; document.getElementById('2501.12040v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.12040v1-abstract-full" style="display: none;"> Vehicle-to-Everything communications-assisted Autonomous Driving (V2X-AD) has witnessed remarkable advancements in recent years, with pragmatic communications (PragComm) emerging as a promising paradigm for real-time collaboration among vehicles and other agents.Simultaneously, extensive research has explored the interplay between collaborative perception and decision-making in end-to-end driving frameworks.In this work, we revisit the collaborative driving problem and propose the Select2Drive framework to optimize the utilization of limited computational and communication resources.Particularly, to mitigate cumulative latency in perception and decision-making, Select2Drive introduces Distributed Predictive Perception (DPP) by formulating an active prediction paradigm and simplifies high-dimensional semantic feature prediction into computation cost-efficient, motion-aware reconstruction. Given the "less is more" principle that a broadened perceptual horizon possibly confuses the decision module rather than contributing to it, Select2Drive utilizes Area-of-Importance-based PragComm (APC) to prioritize the communications of critical regions, thus boosting both communication efficiency and decision-making efficacy. Empirical evaluations on the V2Xverse dataset and CARLA driving simulator demonstrate that Select2Drive achieves a 11.31% (resp. 7.69%) improvement in offline perception tasks under limited bandwidth (resp. pose error conditions). Moreover, it delivers at most 14.68% and 31.76% enhancement in closed-loop driving scores and route completion rates, particularly in scenarios characterized by dense traffic and high-speed dynamics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12040v1-abstract-full').style.display = 'none'; document.getElementById('2501.12040v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.11960">arXiv:2501.11960</a> <span> [<a href="https://arxiv.org/pdf/2501.11960">pdf</a>, <a href="https://arxiv.org/format/2501.11960">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> TAD-Bench: A Comprehensive Benchmark for Embedding-Based Text Anomaly Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cao%2C+Y">Yang Cao</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+S">Sikun Yang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Chen Li</a>, <a href="/search/cs?searchtype=author&query=Xiang%2C+H">Haolong Xiang</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+L">Lianyong Qi</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+B">Bo Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Rongsheng Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+M">Ming Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.11960v1-abstract-short" style="display: inline;"> Text anomaly detection is crucial for identifying spam, misinformation, and offensive language in natural language processing tasks. Despite the growing adoption of embedding-based methods, their effectiveness and generalizability across diverse application scenarios remain under-explored. To address this, we present TAD-Bench, a comprehensive benchmark designed to systematically evaluate embeddin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11960v1-abstract-full').style.display = 'inline'; document.getElementById('2501.11960v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.11960v1-abstract-full" style="display: none;"> Text anomaly detection is crucial for identifying spam, misinformation, and offensive language in natural language processing tasks. Despite the growing adoption of embedding-based methods, their effectiveness and generalizability across diverse application scenarios remain under-explored. To address this, we present TAD-Bench, a comprehensive benchmark designed to systematically evaluate embedding-based approaches for text anomaly detection. TAD-Bench integrates multiple datasets spanning different domains, combining state-of-the-art embeddings from large language models with a variety of anomaly detection algorithms. Through extensive experiments, we analyze the interplay between embeddings and detection methods, uncovering their strengths, weaknesses, and applicability to different tasks. These findings offer new perspectives on building more robust, efficient, and generalizable anomaly detection systems for real-world applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11960v1-abstract-full').style.display = 'none'; document.getElementById('2501.11960v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.11858">arXiv:2501.11858</a> <span> [<a href="https://arxiv.org/pdf/2501.11858">pdf</a>, <a href="https://arxiv.org/format/2501.11858">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> EmbodiedEval: Evaluate Multimodal LLMs as Embodied Agents </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cheng%2C+Z">Zhili Cheng</a>, <a href="/search/cs?searchtype=author&query=Tu%2C+Y">Yuge Tu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Ran Li</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+S">Shiqi Dai</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+J">Jinyi Hu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+S">Shengding Hu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiahao Li</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+Y">Yang Shi</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+T">Tianyu Yu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+W">Weize Chen</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+L">Lei Shi</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Maosong Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.11858v1-abstract-short" style="display: inline;"> Multimodal Large Language Models (MLLMs) have shown significant advancements, providing a promising future for embodied agents. Existing benchmarks for evaluating MLLMs primarily utilize static images or videos, limiting assessments to non-interactive scenarios. Meanwhile, existing embodied AI benchmarks are task-specific and not diverse enough, which do not adequately evaluate the embodied capabi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11858v1-abstract-full').style.display = 'inline'; document.getElementById('2501.11858v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.11858v1-abstract-full" style="display: none;"> Multimodal Large Language Models (MLLMs) have shown significant advancements, providing a promising future for embodied agents. Existing benchmarks for evaluating MLLMs primarily utilize static images or videos, limiting assessments to non-interactive scenarios. Meanwhile, existing embodied AI benchmarks are task-specific and not diverse enough, which do not adequately evaluate the embodied capabilities of MLLMs. To address this, we propose EmbodiedEval, a comprehensive and interactive evaluation benchmark for MLLMs with embodied tasks. EmbodiedEval features 328 distinct tasks within 125 varied 3D scenes, each of which is rigorously selected and annotated. It covers a broad spectrum of existing embodied AI tasks with significantly enhanced diversity, all within a unified simulation and evaluation framework tailored for MLLMs. The tasks are organized into five categories: navigation, object interaction, social interaction, attribute question answering, and spatial question answering to assess different capabilities of the agents. We evaluated the state-of-the-art MLLMs on EmbodiedEval and found that they have a significant shortfall compared to human level on embodied tasks. Our analysis demonstrates the limitations of existing MLLMs in embodied capabilities, providing insights for their future development. We open-source all evaluation data and simulation framework at https://github.com/thunlp/EmbodiedEval. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11858v1-abstract-full').style.display = 'none'; document.getElementById('2501.11858v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.11823">arXiv:2501.11823</a> <span> [<a href="https://arxiv.org/pdf/2501.11823">pdf</a>, <a href="https://arxiv.org/format/2501.11823">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> Toward Scalable Graph Unlearning: A Node Influence Maximization based Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+X">Xunkai Li</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+B">Bowen Fan</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zhengyu Wu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhiyu Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Rong-Hua Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+G">Guoren Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.11823v1-abstract-short" style="display: inline;"> Machine unlearning, as a pivotal technology for enhancing model robustness and data privacy, has garnered significant attention in prevalent web mining applications, especially in thriving graph-based scenarios. However, most existing graph unlearning (GU) approaches face significant challenges due to the intricate interactions among web-scale graph elements during the model training: (1) The grad… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11823v1-abstract-full').style.display = 'inline'; document.getElementById('2501.11823v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.11823v1-abstract-full" style="display: none;"> Machine unlearning, as a pivotal technology for enhancing model robustness and data privacy, has garnered significant attention in prevalent web mining applications, especially in thriving graph-based scenarios. However, most existing graph unlearning (GU) approaches face significant challenges due to the intricate interactions among web-scale graph elements during the model training: (1) The gradient-driven node entanglement hinders the complete knowledge removal in response to unlearning requests; (2) The billion-level graph elements in the web scenarios present inevitable scalability issues. To break the above limitations, we open up a new perspective by drawing a connection between GU and conventional social influence maximization. To this end, we propose Node Influence Maximization (NIM) through the decoupled influence propagation model and fine-grained influence function in a scalable manner, which is crafted to be a plug-and-play strategy to identify potential nodes affected by unlearning entities. This approach enables offline execution independent of GU, allowing it to be seamlessly integrated into most GU methods to improve their unlearning performance. Based on this, we introduce Scalable Graph Unlearning (SGU) as a new fine-tuned framework, which balances the forgetting and reasoning capability of the unlearned model by entity-specific optimizations. Extensive experiments on 14 datasets, including large-scale ogbn-papers100M, have demonstrated the effectiveness of our approach. Specifically, NIM enhances the forgetting capability of most GU methods, while SGU achieves comprehensive SOTA performance and maintains scalability. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11823v1-abstract-full').style.display = 'none'; document.getElementById('2501.11823v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Under Review</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.11817">arXiv:2501.11817</a> <span> [<a href="https://arxiv.org/pdf/2501.11817">pdf</a>, <a href="https://arxiv.org/format/2501.11817">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> Toward Effective Digraph Representation Learning: A Magnetic Adaptive Propagation based Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+X">Xunkai Li</a>, <a href="/search/cs?searchtype=author&query=Su%2C+D">Daohan Su</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zhengyu Wu</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+G">Guang Zeng</a>, <a href="/search/cs?searchtype=author&query=Qin%2C+H">Hongchao Qin</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Rong-Hua Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+G">Guoren Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.11817v1-abstract-short" style="display: inline;"> The $q$-parameterized magnetic Laplacian serves as the foundation of directed graph (digraph) convolution, enabling this kind of digraph neural network (MagDG) to encode node features and structural insights by complex-domain message passing. As a generalization of undirected methods, MagDG shows superior capability in modeling intricate web-scale topology. Despite the great success achieved by ex… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11817v1-abstract-full').style.display = 'inline'; document.getElementById('2501.11817v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.11817v1-abstract-full" style="display: none;"> The $q$-parameterized magnetic Laplacian serves as the foundation of directed graph (digraph) convolution, enabling this kind of digraph neural network (MagDG) to encode node features and structural insights by complex-domain message passing. As a generalization of undirected methods, MagDG shows superior capability in modeling intricate web-scale topology. Despite the great success achieved by existing MagDGs, limitations still exist: (1) Hand-crafted $q$: The performance of MagDGs depends on selecting an appropriate $q$-parameter to construct suitable graph propagation equations in the complex domain. This parameter tuning, driven by downstream tasks, limits model flexibility and significantly increases manual effort. (2) Coarse Message Passing: Most approaches treat all nodes with the same complex-domain propagation and aggregation rules, neglecting their unique digraph contexts. This oversight results in sub-optimal performance. To address the above issues, we propose two key techniques: (1) MAP is crafted to be a plug-and-play complex-domain propagation optimization strategy in the context of digraph learning, enabling seamless integration into any MagDG to improve predictions while enjoying high running efficiency. (2) MAP++ is a new digraph learning framework, further incorporating a learnable mechanism to achieve adaptively edge-wise propagation and node-wise aggregation in the complex domain for better performance. Extensive experiments on 12 datasets demonstrate that MAP enjoys flexibility for it can be incorporated with any MagDG, and scalability as it can deal with web-scale digraphs. MAP++ achieves SOTA predictive performance on 4 different downstream tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11817v1-abstract-full').style.display = 'none'; document.getElementById('2501.11817v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by WWW 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.11292">arXiv:2501.11292</a> <span> [<a href="https://arxiv.org/pdf/2501.11292">pdf</a>, <a href="https://arxiv.org/format/2501.11292">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Advancing Multi-Party Dialogue Systems with Speaker-ware Contrastive Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+Z">Zhongtian Hu</a>, <a href="/search/cs?searchtype=author&query=He%2C+Q">Qi He</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Ronghan Li</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+M">Meng Zhao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Lifang Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.11292v1-abstract-short" style="display: inline;"> Dialogue response generation has made significant progress, but most research has focused on dyadic dialogue. In contrast, multi-party dialogues involve more participants, each potentially discussing different topics, making the task more complex. Current methods often rely on graph neural networks to model dialogue context, which helps capture the structural dynamics of multi-party conversations.… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11292v1-abstract-full').style.display = 'inline'; document.getElementById('2501.11292v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.11292v1-abstract-full" style="display: none;"> Dialogue response generation has made significant progress, but most research has focused on dyadic dialogue. In contrast, multi-party dialogues involve more participants, each potentially discussing different topics, making the task more complex. Current methods often rely on graph neural networks to model dialogue context, which helps capture the structural dynamics of multi-party conversations. However, these methods are heavily dependent on intricate graph structures and dataset annotations, and they often overlook the distinct speaking styles of participants. To address these challenges, we propose CMR, a Contrastive learning-based Multi-party dialogue Response generation model. CMR uses self-supervised contrastive learning to better distinguish "who says what." Additionally, by comparing speakers within the same conversation, the model captures differences in speaking styles and thematic transitions. To the best of our knowledge, this is the first approach to apply contrastive learning in multi-party dialogue generation. Experimental results show that CMR significantly outperforms state-of-the-art models in multi-party dialogue response tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11292v1-abstract-full').style.display = 'none'; document.getElementById('2501.11292v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.11288">arXiv:2501.11288</a> <span> [<a href="https://arxiv.org/pdf/2501.11288">pdf</a>, <a href="https://arxiv.org/format/2501.11288">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> PD-SORT: Occlusion-Robust Multi-Object Tracking Using Pseudo-Depth Cues </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yanchao Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+D">Dawei Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Run Li</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Z">Zhonglong Zheng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+M">Minglu Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.11288v1-abstract-short" style="display: inline;"> Multi-object tracking (MOT) is a rising topic in video processing technologies and has important application value in consumer electronics. Currently, tracking-by-detection (TBD) is the dominant paradigm for MOT, which performs target detection and association frame by frame. However, the association performance of TBD methods degrades in complex scenes with heavy occlusions, which hinders the app… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11288v1-abstract-full').style.display = 'inline'; document.getElementById('2501.11288v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.11288v1-abstract-full" style="display: none;"> Multi-object tracking (MOT) is a rising topic in video processing technologies and has important application value in consumer electronics. Currently, tracking-by-detection (TBD) is the dominant paradigm for MOT, which performs target detection and association frame by frame. However, the association performance of TBD methods degrades in complex scenes with heavy occlusions, which hinders the application of such methods in real-world scenarios.To this end, we incorporate pseudo-depth cues to enhance the association performance and propose Pseudo-Depth SORT (PD-SORT). First, we extend the Kalman filter state vector with pseudo-depth states. Second, we introduce a novel depth volume IoU (DVIoU) by combining the conventional 2D IoU with pseudo-depth. Furthermore, we develop a quantized pseudo-depth measurement (QPDM) strategy for more robust data association. Besides, we also integrate camera motion compensation (CMC) to handle dynamic camera situations. With the above designs, PD-SORT significantly alleviates the occlusion-induced ambiguous associations and achieves leading performances on DanceTrack, MOT17, and MOT20. Note that the improvement is especially obvious on DanceTrack, where objects show complex motions, similar appearances, and frequent occlusions. The code is available at https://github.com/Wangyc2000/PD_SORT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11288v1-abstract-full').style.display = 'none'; document.getElementById('2501.11288v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.11269">arXiv:2501.11269</a> <span> [<a href="https://arxiv.org/pdf/2501.11269">pdf</a>, <a href="https://arxiv.org/format/2501.11269">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Can xLLMs Understand the Structure of Dialog? Exploring Multilingual Response Generation in Complex Scenarios </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+Z">Zhongtian Hu</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+Y">Yiwen Cui</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Ronghan Li</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+M">Meng Zhao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Lifang Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.11269v1-abstract-short" style="display: inline;"> Multilingual research has garnered increasing attention, especially in the domain of dialogue systems. The rapid advancements in large language models (LLMs) have fueled the demand for high-performing multilingual models. However, two major challenges persist: the scarcity of high-quality multilingual datasets and the limited complexity of existing datasets in capturing realistic dialogue scenario… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11269v1-abstract-full').style.display = 'inline'; document.getElementById('2501.11269v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.11269v1-abstract-full" style="display: none;"> Multilingual research has garnered increasing attention, especially in the domain of dialogue systems. The rapid advancements in large language models (LLMs) have fueled the demand for high-performing multilingual models. However, two major challenges persist: the scarcity of high-quality multilingual datasets and the limited complexity of existing datasets in capturing realistic dialogue scenarios. To address these gaps, we introduce XMP, a high-quality parallel Multilingual dataset sourced from Multi-party Podcast dialogues. Each sample in the dataset features at least three participants discussing a wide range of topics, including society, culture, politics, and entertainment.Through extensive experiments, we uncover significant limitations in previously recognized multilingual capabilities of LLMs when applied to such complex dialogue scenarios. For instance, the widely accepted multilingual complementary ability of LLMs is notably impacted. By conducting further experiments, we explore the mechanisms of LLMs in multilingual environments from multiple perspectives, shedding new light on their performance in real-world, diverse conversational contexts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11269v1-abstract-full').style.display = 'none'; document.getElementById('2501.11269v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.11041">arXiv:2501.11041</a> <span> [<a href="https://arxiv.org/pdf/2501.11041">pdf</a>, <a href="https://arxiv.org/format/2501.11041">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Semantic Consistency of Large Language Models through Model Editing: An Interpretability-Oriented Approach </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jingyuan Yang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+D">Dapeng Chen</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yajing Sun</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Rongjun Li</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+Z">Zhiyong Feng</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+W">Wei Peng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.11041v1-abstract-short" style="display: inline;"> A Large Language Model (LLM) tends to generate inconsistent and sometimes contradictory outputs when presented with a prompt that has equivalent semantics but is expressed differently from the original prompt. To achieve semantic consistency of an LLM, one of the key approaches is to finetune the model with prompt-output pairs with semantically equivalent meanings. Despite its effectiveness, a dat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11041v1-abstract-full').style.display = 'inline'; document.getElementById('2501.11041v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.11041v1-abstract-full" style="display: none;"> A Large Language Model (LLM) tends to generate inconsistent and sometimes contradictory outputs when presented with a prompt that has equivalent semantics but is expressed differently from the original prompt. To achieve semantic consistency of an LLM, one of the key approaches is to finetune the model with prompt-output pairs with semantically equivalent meanings. Despite its effectiveness, a data-driven finetuning method incurs substantial computation costs in data preparation and model optimization. In this regime, an LLM is treated as a ``black box'', restricting our ability to gain deeper insights into its internal mechanism. In this paper, we are motivated to enhance the semantic consistency of LLMs through a more interpretable method (i.e., model editing) to this end. We first identify the model components (i.e., attention heads) that have a key impact on the semantic consistency of an LLM. We subsequently inject biases into the output of these model components along the semantic-consistency activation direction. It is noteworthy that these modifications are cost-effective, without reliance on mass manipulations of the original model parameters. Through comprehensive experiments on the constructed NLU and open-source NLG datasets, our method demonstrates significant improvements in the semantic consistency and task performance of LLMs. Additionally, our method exhibits promising generalization capabilities by performing well on tasks beyond the primary tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11041v1-abstract-full').style.display = 'none'; document.getElementById('2501.11041v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.11036">arXiv:2501.11036</a> <span> [<a href="https://arxiv.org/pdf/2501.11036">pdf</a>, <a href="https://arxiv.org/format/2501.11036">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> LF-Steering: Latent Feature Activation Steering for Enhancing Semantic Consistency in Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jingyuan Yang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Rongjun Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Weixuan Wang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Z">Ziyu Zhou</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+Z">Zhiyong Feng</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+W">Wei Peng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.11036v2-abstract-short" style="display: inline;"> Large Language Models (LLMs) often generate inconsistent responses when prompted with semantically equivalent paraphrased inputs. Recently, activation steering, a technique that modulates LLMs' behaviours by adjusting their latent representations during inference time, has been explored to improve the semantic consistency of LLMs. However, these methods typically operate at the model component lev… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11036v2-abstract-full').style.display = 'inline'; document.getElementById('2501.11036v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.11036v2-abstract-full" style="display: none;"> Large Language Models (LLMs) often generate inconsistent responses when prompted with semantically equivalent paraphrased inputs. Recently, activation steering, a technique that modulates LLMs' behaviours by adjusting their latent representations during inference time, has been explored to improve the semantic consistency of LLMs. However, these methods typically operate at the model component level, such as layer hidden states or attention head outputs. They face a challenge due to the ``polysemanticity issue'', where the model components of LLMs typically encode multiple entangled features, making precise steering difficult. To address this challenge, we drill down to feature-level representations and propose LF-Steering, a novel activation steering approach to precisely identify latent feature representations responsible for semantic inconsistency. More specifically, our method maps the hidden states of the relevant transformer layer into a sparsely activated, high-dimensional feature space based on a sparse autoencoder (SAE), ensuring model steering based on decoupled feature representations with minimal interference. Comprehensive experiments on NLU and NLG datasets demonstrate the effectiveness of our method in enhancing semantic consistency, resulting in significant performance gains for various NLU and NLG tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11036v2-abstract-full').style.display = 'none'; document.getElementById('2501.11036v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 19 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.09426">arXiv:2501.09426</a> <span> [<a href="https://arxiv.org/pdf/2501.09426">pdf</a>, <a href="https://arxiv.org/format/2501.09426">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> AutoCBT: An Autonomous Multi-agent Framework for Cognitive Behavioral Therapy in Psychological Counseling </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+A">Ancheng Xu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+D">Di Yang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Renhao Li</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+J">Jingwei Zhu</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+M">Minghuan Tan</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+M">Min Yang</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+W">Wanxin Qiu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+M">Mingchen Ma</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+H">Haihong Wu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bingyu Li</a>, <a href="/search/cs?searchtype=author&query=Sha%2C+F">Feng Sha</a>, <a href="/search/cs?searchtype=author&query=Li%2C+C">Chengming Li</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+X">Xiping Hu</a>, <a href="/search/cs?searchtype=author&query=Qu%2C+Q">Qiang Qu</a>, <a href="/search/cs?searchtype=author&query=Wong%2C+D+F">Derek F. Wong</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+R">Ruifeng Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.09426v1-abstract-short" style="display: inline;"> Traditional in-person psychological counseling remains primarily niche, often chosen by individuals with psychological issues, while online automated counseling offers a potential solution for those hesitant to seek help due to feelings of shame. Cognitive Behavioral Therapy (CBT) is an essential and widely used approach in psychological counseling. The advent of large language models (LLMs) and a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09426v1-abstract-full').style.display = 'inline'; document.getElementById('2501.09426v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.09426v1-abstract-full" style="display: none;"> Traditional in-person psychological counseling remains primarily niche, often chosen by individuals with psychological issues, while online automated counseling offers a potential solution for those hesitant to seek help due to feelings of shame. Cognitive Behavioral Therapy (CBT) is an essential and widely used approach in psychological counseling. The advent of large language models (LLMs) and agent technology enables automatic CBT diagnosis and treatment. However, current LLM-based CBT systems use agents with a fixed structure, limiting their self-optimization capabilities, or providing hollow, unhelpful suggestions due to redundant response patterns. In this work, we utilize Quora-like and YiXinLi single-round consultation models to build a general agent framework that generates high-quality responses for single-turn psychological consultation scenarios. We use a bilingual dataset to evaluate the quality of single-response consultations generated by each framework. Then, we incorporate dynamic routing and supervisory mechanisms inspired by real psychological counseling to construct a CBT-oriented autonomous multi-agent framework, demonstrating its general applicability. Experimental results indicate that AutoCBT can provide higher-quality automated psychological counseling services. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09426v1-abstract-full').style.display = 'none'; document.getElementById('2501.09426v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.08737">arXiv:2501.08737</a> <span> [<a href="https://arxiv.org/pdf/2501.08737">pdf</a>, <a href="https://arxiv.org/format/2501.08737">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Resource-Constrained Federated Continual Learning: What Does Matter? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yichen Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yuying Wang</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+J">Jiahua Dong</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Haozhao Wang</a>, <a href="/search/cs?searchtype=author&query=Qi%2C+Y">Yining Qi</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Rui Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Ruixuan Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.08737v1-abstract-short" style="display: inline;"> Federated Continual Learning (FCL) aims to enable sequentially privacy-preserving model training on streams of incoming data that vary in edge devices by preserving previous knowledge while adapting to new data. Current FCL literature focuses on restricted data privacy and access to previously seen data while imposing no constraints on the training overhead. This is unreasonable for FCL applicatio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08737v1-abstract-full').style.display = 'inline'; document.getElementById('2501.08737v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.08737v1-abstract-full" style="display: none;"> Federated Continual Learning (FCL) aims to enable sequentially privacy-preserving model training on streams of incoming data that vary in edge devices by preserving previous knowledge while adapting to new data. Current FCL literature focuses on restricted data privacy and access to previously seen data while imposing no constraints on the training overhead. This is unreasonable for FCL applications in real-world scenarios, where edge devices are primarily constrained by resources such as storage, computational budget, and label rate. We revisit this problem with a large-scale benchmark and analyze the performance of state-of-the-art FCL approaches under different resource-constrained settings. Various typical FCL techniques and six datasets in two incremental learning scenarios (Class-IL and Domain-IL) are involved in our experiments. Through extensive experiments amounting to a total of over 1,000+ GPU hours, we find that, under limited resource-constrained settings, existing FCL approaches, with no exception, fail to achieve the expected performance. Our conclusions are consistent in the sensitivity analysis. This suggests that most existing FCL methods are particularly too resource-dependent for real-world deployment. Moreover, we study the performance of typical FCL techniques with resource constraints and shed light on future research directions in FCL. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08737v1-abstract-full').style.display = 'none'; document.getElementById('2501.08737v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: text overlap with arXiv:2303.11165 by other authors</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.08667">arXiv:2501.08667</a> <span> [<a href="https://arxiv.org/pdf/2501.08667">pdf</a>, <a href="https://arxiv.org/format/2501.08667">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> TimeFlow: Longitudinal Brain Image Registration and Aging Progression Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jian%2C+B">Bailiang Jian</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+J">Jiazhen Pan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yitong Li</a>, <a href="/search/cs?searchtype=author&query=Bongratz%2C+F">Fabian Bongratz</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Ruochen Li</a>, <a href="/search/cs?searchtype=author&query=Rueckert%2C+D">Daniel Rueckert</a>, <a href="/search/cs?searchtype=author&query=Wiestler%2C+B">Benedikt Wiestler</a>, <a href="/search/cs?searchtype=author&query=Wachinger%2C+C">Christian Wachinger</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.08667v1-abstract-short" style="display: inline;"> Predicting future brain states is crucial for understanding healthy aging and neurodegenerative diseases. Longitudinal brain MRI registration, a cornerstone for such analyses, has long been limited by its inability to forecast future developments, reliance on extensive, dense longitudinal data, and the need to balance registration accuracy with temporal smoothness. In this work, we present \emph{T… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08667v1-abstract-full').style.display = 'inline'; document.getElementById('2501.08667v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.08667v1-abstract-full" style="display: none;"> Predicting future brain states is crucial for understanding healthy aging and neurodegenerative diseases. Longitudinal brain MRI registration, a cornerstone for such analyses, has long been limited by its inability to forecast future developments, reliance on extensive, dense longitudinal data, and the need to balance registration accuracy with temporal smoothness. In this work, we present \emph{TimeFlow}, a novel framework for longitudinal brain MRI registration that overcomes all these challenges. Leveraging a U-Net architecture with temporal conditioning inspired by diffusion models, TimeFlow enables accurate longitudinal registration and facilitates prospective analyses through future image prediction. Unlike traditional methods that depend on explicit smoothness regularizers and dense sequential data, TimeFlow achieves temporal consistency and continuity without these constraints. Experimental results highlight its superior performance in both future timepoint prediction and registration accuracy compared to state-of-the-art methods. Additionally, TimeFlow supports novel biological brain aging analyses, effectively differentiating neurodegenerative conditions from healthy aging. It eliminates the need for segmentation, thereby avoiding the challenges of non-trivial annotation and inconsistent segmentation errors. TimeFlow paves the way for accurate, data-efficient, and annotation-free prospective analyses of brain aging and chronic diseases. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.08667v1-abstract-full').style.display = 'none'; document.getElementById('2501.08667v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.07296">arXiv:2501.07296</a> <span> [<a href="https://arxiv.org/pdf/2501.07296">pdf</a>, <a href="https://arxiv.org/format/2501.07296">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Event-based Video Person Re-identification via Cross-Modality and Temporal Collaboration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+R">Renkai Li</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+X">Xin Yuan</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+W">Wei Liu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xin Xu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.07296v1-abstract-short" style="display: inline;"> Video-based person re-identification (ReID) has become increasingly important due to its applications in video surveillance applications. By employing events in video-based person ReID, more motion information can be provided between continuous frames to improve recognition accuracy. Previous approaches have assisted by introducing event data into the video person ReID task, but they still cannot… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.07296v1-abstract-full').style.display = 'inline'; document.getElementById('2501.07296v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.07296v1-abstract-full" style="display: none;"> Video-based person re-identification (ReID) has become increasingly important due to its applications in video surveillance applications. By employing events in video-based person ReID, more motion information can be provided between continuous frames to improve recognition accuracy. Previous approaches have assisted by introducing event data into the video person ReID task, but they still cannot avoid the privacy leakage problem caused by RGB images. In order to avoid privacy attacks and to take advantage of the benefits of event data, we consider using only event data. To make full use of the information in the event stream, we propose a Cross-Modality and Temporal Collaboration (CMTC) network for event-based video person ReID. First, we design an event transform network to obtain corresponding auxiliary information from the input of raw events. Additionally, we propose a differential modality collaboration module to balance the roles of events and auxiliaries to achieve complementary effects. Furthermore, we introduce a temporal collaboration module to exploit motion information and appearance cues. Experimental results demonstrate that our method outperforms others in the task of event-based video person ReID. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.07296v1-abstract-full').style.display = 'none'; document.getElementById('2501.07296v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ICASSP 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.05767">arXiv:2501.05767</a> <span> [<a href="https://arxiv.org/pdf/2501.05767">pdf</a>, <a href="https://arxiv.org/format/2501.05767">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Migician: Revealing the Magic of Free-Form Multi-Image Grounding in Multimodal Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Y">You Li</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+H">Heyu Huang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+C">Chi Chen</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kaiyu Huang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+C">Chao Huang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Z">Zonghao Guo</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhiyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jinan Xu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yuhua Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Ruixuan Li</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Maosong Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.05767v2-abstract-short" style="display: inline;"> The recent advancement of Multimodal Large Language Models (MLLMs) has significantly improved their fine-grained perception of single images and general comprehension across multiple images. However, existing MLLMs still face challenges in achieving precise grounding in complex multi-image scenarios. To address this, we first explore a Chain-of-Thought (CoT) framework that integrates single-image… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05767v2-abstract-full').style.display = 'inline'; document.getElementById('2501.05767v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.05767v2-abstract-full" style="display: none;"> The recent advancement of Multimodal Large Language Models (MLLMs) has significantly improved their fine-grained perception of single images and general comprehension across multiple images. However, existing MLLMs still face challenges in achieving precise grounding in complex multi-image scenarios. To address this, we first explore a Chain-of-Thought (CoT) framework that integrates single-image grounding with multi-image comprehension. While partially effective, it remains unstable and struggles to capture abstract visual information due to its non-end-to-end nature. Therefore, we introduce Migician, the first multi-image grounding model capable of performing free-form and accurate grounding across multiple images. To support this, we present the MGrounding-630k dataset, which comprises data for several multi-image grounding tasks derived from existing datasets, along with newly generated free-form grounding instruction-following data. Furthermore, we propose MIG-Bench, a comprehensive benchmark specifically designed for evaluating multi-image grounding capabilities. Experimental results demonstrate that our model achieves significantly superior multi-image grounding capabilities, outperforming the best existing MLLMs by 21.61% and even surpassing much larger 70B models. Our code, model, dataset, and benchmark are fully open-sourced at https://migician-vg.github.io/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05767v2-abstract-full').style.display = 'none'; document.getElementById('2501.05767v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 10 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">20 pages, 8 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.05310">arXiv:2501.05310</a> <span> [<a href="https://arxiv.org/pdf/2501.05310">pdf</a>, <a href="https://arxiv.org/format/2501.05310">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> </div> </div> <p class="title is-5 mathjax"> Probing Speaker-specific Features in Speaker Representations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chiu%2C+A+Y+F">Aemon Yat Fei Chiu</a>, <a href="/search/cs?searchtype=author&query=Fung%2C+P+K+C">Paco Kei Ching Fung</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R+T+Y">Roger Tsz Yeung Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jingyu Li</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+T">Tan Lee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.05310v1-abstract-short" style="display: inline;"> This study explores speaker-specific features encoded in speaker embeddings and intermediate layers of speech self-supervised learning (SSL) models. By utilising a probing method, we analyse features such as pitch, tempo, and energy across prominent speaker embedding models and speech SSL models, including HuBERT, WavLM, and Wav2vec 2.0. The results reveal that speaker embeddings like CAM++ excel… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05310v1-abstract-full').style.display = 'inline'; document.getElementById('2501.05310v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.05310v1-abstract-full" style="display: none;"> This study explores speaker-specific features encoded in speaker embeddings and intermediate layers of speech self-supervised learning (SSL) models. By utilising a probing method, we analyse features such as pitch, tempo, and energy across prominent speaker embedding models and speech SSL models, including HuBERT, WavLM, and Wav2vec 2.0. The results reveal that speaker embeddings like CAM++ excel in energy classification, while speech SSL models demonstrate superior performance across multiple features due to their hierarchical feature encoding. Intermediate layers effectively capture a mix of acoustic and para-linguistic information, with deeper layers refining these representations. This investigation provides insights into model design and highlights the potential of these representations for downstream applications, such as speaker verification and text-to-speech synthesis, while laying the groundwork for exploring additional features and advanced probing methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.05310v1-abstract-full').style.display = 'none'; document.getElementById('2501.05310v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.04963">arXiv:2501.04963</a> <span> [<a href="https://arxiv.org/pdf/2501.04963">pdf</a>, <a href="https://arxiv.org/format/2501.04963">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Shelving it rather than Ditching it: Dynamically Debloating DEX and Native Methods of Android Applications without APK Modification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zicheng Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiakun Liu</a>, <a href="/search/cs?searchtype=author&query=Thung%2C+F">Ferdian Thung</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+H">Haoyu Ma</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Rui Li</a>, <a href="/search/cs?searchtype=author&query=Tun%2C+Y+N">Yan Naing Tun</a>, <a href="/search/cs?searchtype=author&query=Minn%2C+W">Wei Minn</a>, <a href="/search/cs?searchtype=author&query=Shar%2C+L+K">Lwin Khin Shar</a>, <a href="/search/cs?searchtype=author&query=Maoz%2C+S">Shahar Maoz</a>, <a href="/search/cs?searchtype=author&query=Toch%2C+E">Eran Toch</a>, <a href="/search/cs?searchtype=author&query=Lo%2C+D">David Lo</a>, <a href="/search/cs?searchtype=author&query=Wong%2C+J">Joshua Wong</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+D">Debin Gao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.04963v1-abstract-short" style="display: inline;"> Today's Android developers tend to include numerous features to accommodate diverse user requirements, which inevitably leads to bloated apps. Yet more often than not, only a fraction of these features are frequently utilized by users, thus a bloated app costs dearly in potential vulnerabilities, expanded attack surfaces, and additional resource consumption. Especially in the event of severe secur… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.04963v1-abstract-full').style.display = 'inline'; document.getElementById('2501.04963v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.04963v1-abstract-full" style="display: none;"> Today's Android developers tend to include numerous features to accommodate diverse user requirements, which inevitably leads to bloated apps. Yet more often than not, only a fraction of these features are frequently utilized by users, thus a bloated app costs dearly in potential vulnerabilities, expanded attack surfaces, and additional resource consumption. Especially in the event of severe security incidents, users have the need to block vulnerable functionalities immediately. Existing works have proposed various code debloating approaches for identifying and removing features of executable components. However, they typically involve static modification of files (and, for Android apps, repackaging of APKs, too), which lacks user convenience let alone undermining the security model of Android due to the compromising of public key verification and code integrity checks. This paper introduces 3DNDroid, a Dynamic Debloating approach targeting both DEX and Native methods in AnDroid apps. Using an unprivileged management app in tandem with a customized Android OS, 3DNDroid dynamically reduces unnecessary code loading during app execution based on a pre-generated debloating schema from static or dynamic analyses. It intercepts invocations of debloated bytecode methods to prevent their interpretation, compilation, and execution, while zero-filling memory spaces of debloated native methods during code loading. Evaluation demonstrates 3DNDroid's ability to debloat 187 DEX methods and 30 native methods across 55 real-world apps, removing over 10K Return-Oriented Programming (ROP) gadgets. Case studies confirm its effectiveness in mitigating vulnerabilities, and performance assessments highlight its resource-saving advantages over non-debloated apps. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.04963v1-abstract-full').style.display = 'none'; document.getElementById('2501.04963v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.04568">arXiv:2501.04568</a> <span> [<a href="https://arxiv.org/pdf/2501.04568">pdf</a>, <a href="https://arxiv.org/format/2501.04568">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Supervision-free Vision-Language Alignment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Giannone%2C+G">Giorgio Giannone</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Ruoteng Li</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+Q">Qianli Feng</a>, <a href="/search/cs?searchtype=author&query=Perevodchikov%2C+E">Evgeny Perevodchikov</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+R">Rui Chen</a>, <a href="/search/cs?searchtype=author&query=Martinez%2C+A">Aleix Martinez</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.04568v1-abstract-short" style="display: inline;"> Vision-language models (VLMs) have demonstrated remarkable potential in integrating visual and linguistic information, but their performance is often constrained by the need for extensive, high-quality image-text training data. Curation of these image-text pairs is both time-consuming and computationally expensive. To address this challenge, we introduce SVP (Supervision-free Visual Projection), a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.04568v1-abstract-full').style.display = 'inline'; document.getElementById('2501.04568v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.04568v1-abstract-full" style="display: none;"> Vision-language models (VLMs) have demonstrated remarkable potential in integrating visual and linguistic information, but their performance is often constrained by the need for extensive, high-quality image-text training data. Curation of these image-text pairs is both time-consuming and computationally expensive. To address this challenge, we introduce SVP (Supervision-free Visual Projection), a novel framework that enhances vision-language alignment without relying on curated data or preference annotation. SVP leverages self-captioning and a pre-trained grounding model as a feedback mechanism to elicit latent information in VLMs. We evaluate our approach across six key areas: captioning, referring, visual question answering, multitasking, hallucination control, and object recall. Results demonstrate significant improvements, including a 14% average improvement in captioning tasks, up to 12% increase in object recall, and substantial reduction in hallucination rates. Notably, a small VLM using SVP achieves hallucination reductions comparable to a model five times larger, while a VLM with initially poor referring capabilities more than doubles its performance, approaching parity with a model twice its size. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.04568v1-abstract-full').style.display = 'none'; document.getElementById('2501.04568v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Preprint</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.04353">arXiv:2501.04353</a> <span> [<a href="https://arxiv.org/pdf/2501.04353">pdf</a>, <a href="https://arxiv.org/format/2501.04353">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> DeFusion: An Effective Decoupling Fusion Network for Multi-Modal Pregnancy Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ouyang%2C+X">Xueqiang Ouyang</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+J">Jia Wei</a>, <a href="/search/cs?searchtype=author&query=Huo%2C+W">Wenjie Huo</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaocong Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Rui Li</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+J">Jianlong Zhou</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.04353v1-abstract-short" style="display: inline;"> Temporal embryo images and parental fertility table indicators are both valuable for pregnancy prediction in \textbf{in vitro fertilization embryo transfer} (IVF-ET). However, current machine learning models cannot make full use of the complementary information between the two modalities to improve pregnancy prediction performance. In this paper, we propose a Decoupling Fusion Network called DeFus… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.04353v1-abstract-full').style.display = 'inline'; document.getElementById('2501.04353v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.04353v1-abstract-full" style="display: none;"> Temporal embryo images and parental fertility table indicators are both valuable for pregnancy prediction in \textbf{in vitro fertilization embryo transfer} (IVF-ET). However, current machine learning models cannot make full use of the complementary information between the two modalities to improve pregnancy prediction performance. In this paper, we propose a Decoupling Fusion Network called DeFusion to effectively integrate the multi-modal information for IVF-ET pregnancy prediction. Specifically, we propose a decoupling fusion module that decouples the information from the different modalities into related and unrelated information, thereby achieving a more delicate fusion. And we fuse temporal embryo images with a spatial-temporal position encoding, and extract fertility table indicator information with a table transformer. To evaluate the effectiveness of our model, we use a new dataset including 4046 cases collected from Southern Medical University. The experiments show that our model outperforms state-of-the-art methods. Meanwhile, the performance on the eye disease prediction dataset reflects the model's good generalization. Our code and dataset are available at https://github.com/Ou-Young-1999/DFNet. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.04353v1-abstract-full').style.display = 'none'; document.getElementById('2501.04353v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.04285">arXiv:2501.04285</a> <span> [<a href="https://arxiv.org/pdf/2501.04285">pdf</a>, <a href="https://arxiv.org/format/2501.04285">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Theory">cs.IT</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Signal Processing">eess.SP</span> </div> </div> <p class="title is-5 mathjax"> Separate Source Channel Coding Is Still What You Need: An LLM-based Rethinking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ren%2C+T">Tianqi Ren</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Rongpeng Li</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+M">Ming-min Zhao</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xianfu Chen</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+G">Guangyi Liu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yang Yang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Zhifeng Zhao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Honggang Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.04285v1-abstract-short" style="display: inline;"> Along with the proliferating research interest in Semantic Communication (SemCom), Joint Source Channel Coding (JSCC) has dominated the attention due to the widely assumed existence in efficiently delivering information semantics. %has emerged as a pivotal area of research, aiming to enhance the efficiency and reliability of information transmission through deep learning-based methods. Nevertheles… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.04285v1-abstract-full').style.display = 'inline'; document.getElementById('2501.04285v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.04285v1-abstract-full" style="display: none;"> Along with the proliferating research interest in Semantic Communication (SemCom), Joint Source Channel Coding (JSCC) has dominated the attention due to the widely assumed existence in efficiently delivering information semantics. %has emerged as a pivotal area of research, aiming to enhance the efficiency and reliability of information transmission through deep learning-based methods. Nevertheless, this paper challenges the conventional JSCC paradigm, and advocates for adoption of Separate Source Channel Coding (SSCC) to enjoy the underlying more degree of freedom for optimization. We demonstrate that SSCC, after leveraging the strengths of Large Language Model (LLM) for source coding and Error Correction Code Transformer (ECCT) complemented for channel decoding, offers superior performance over JSCC. Our proposed framework also effectively highlights the compatibility challenges between SemCom approaches and digital communication systems, particularly concerning the resource costs associated with the transmission of high precision floating point numbers. Through comprehensive evaluations, we establish that empowered by LLM-based compression and ECCT-enhanced error correction, SSCC remains a viable and effective solution for modern communication systems. In other words, separate source and channel coding is still what we need! <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.04285v1-abstract-full').style.display = 'none'; document.getElementById('2501.04285v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.03229">arXiv:2501.03229</a> <span> [<a href="https://arxiv.org/pdf/2501.03229">pdf</a>, <a href="https://arxiv.org/format/2501.03229">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Gaussian Masked Autoencoders </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Rajasegaran%2C+J">Jathushan Rajasegaran</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+X">Xinlei Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Rulilong Li</a>, <a href="/search/cs?searchtype=author&query=Feichtenhofer%2C+C">Christoph Feichtenhofer</a>, <a href="/search/cs?searchtype=author&query=Malik%2C+J">Jitendra Malik</a>, <a href="/search/cs?searchtype=author&query=Ginosar%2C+S">Shiry Ginosar</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.03229v1-abstract-short" style="display: inline;"> This paper explores Masked Autoencoders (MAE) with Gaussian Splatting. While reconstructive self-supervised learning frameworks such as MAE learns good semantic abstractions, it is not trained for explicit spatial awareness. Our approach, named Gaussian Masked Autoencoder, or GMAE, aims to learn semantic abstractions and spatial understanding jointly. Like MAE, it reconstructs the image end-to-end… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03229v1-abstract-full').style.display = 'inline'; document.getElementById('2501.03229v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.03229v1-abstract-full" style="display: none;"> This paper explores Masked Autoencoders (MAE) with Gaussian Splatting. While reconstructive self-supervised learning frameworks such as MAE learns good semantic abstractions, it is not trained for explicit spatial awareness. Our approach, named Gaussian Masked Autoencoder, or GMAE, aims to learn semantic abstractions and spatial understanding jointly. Like MAE, it reconstructs the image end-to-end in the pixel space, but beyond MAE, it also introduces an intermediate, 3D Gaussian-based representation and renders images via splatting. We show that GMAE can enable various zero-shot learning capabilities of spatial understanding (e.g., figure-ground segmentation, image layering, edge detection, etc.) while preserving the high-level semantics of self-supervised representation quality from MAE. To our knowledge, we are the first to employ Gaussian primitives in an image representation learning framework beyond optimization-based single-scene reconstructions. We believe GMAE will inspire further research in this direction and contribute to developing next-generation techniques for modeling high-fidelity visual data. More details at https://brjathu.github.io/gmae <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.03229v1-abstract-full').style.display = 'none'; document.getElementById('2501.03229v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02728">arXiv:2501.02728</a> <span> [<a href="https://arxiv.org/pdf/2501.02728">pdf</a>, <a href="https://arxiv.org/format/2501.02728">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> OpenGU: A Comprehensive Benchmark for Graph Unlearning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fan%2C+B">Bowen Fan</a>, <a href="/search/cs?searchtype=author&query=Ai%2C+Y">Yuming Ai</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xunkai Li</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Z">Zhilin Guo</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Rong-Hua Li</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+G">Guoren Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02728v1-abstract-short" style="display: inline;"> Graph Machine Learning is essential for understanding and analyzing relational data. However, privacy-sensitive applications demand the ability to efficiently remove sensitive information from trained graph neural networks (GNNs), avoiding the unnecessary time and space overhead caused by retraining models from scratch. To address this issue, Graph Unlearning (GU) has emerged as a critical solutio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02728v1-abstract-full').style.display = 'inline'; document.getElementById('2501.02728v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02728v1-abstract-full" style="display: none;"> Graph Machine Learning is essential for understanding and analyzing relational data. However, privacy-sensitive applications demand the ability to efficiently remove sensitive information from trained graph neural networks (GNNs), avoiding the unnecessary time and space overhead caused by retraining models from scratch. To address this issue, Graph Unlearning (GU) has emerged as a critical solution, with the potential to support dynamic graph updates in data management systems and enable scalable unlearning in distributed data systems while ensuring privacy compliance. Unlike machine unlearning in computer vision or other fields, GU faces unique difficulties due to the non-Euclidean nature of graph data and the recursive message-passing mechanism of GNNs. Additionally, the diversity of downstream tasks and the complexity of unlearning requests further amplify these challenges. Despite the proliferation of diverse GU strategies, the absence of a benchmark providing fair comparisons for GU, and the limited flexibility in combining downstream tasks and unlearning requests, have yielded inconsistencies in evaluations, hindering the development of this domain. To fill this gap, we present OpenGU, the first GU benchmark, where 16 SOTA GU algorithms and 37 multi-domain datasets are integrated, enabling various downstream tasks with 13 GNN backbones when responding to flexible unlearning requests. Based on this unified benchmark framework, we are able to provide a comprehensive and fair evaluation for GU. Through extensive experimentation, we have drawn $8$ crucial conclusions about existing GU methods, while also gaining valuable insights into their limitations, shedding light on potential avenues for future research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02728v1-abstract-full').style.display = 'none'; document.getElementById('2501.02728v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">under review</span> </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Li%2C+R&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Li%2C+R&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Li%2C+R&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Li%2C+R&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Li%2C+R&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Li%2C+R&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>