Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 517 results for author: <span class="mathjax">Zeng, Z</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Zeng%2C+Z">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Zeng, Z"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Zeng%2C+Z&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Zeng, Z"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Zeng%2C+Z&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Zeng%2C+Z&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Zeng%2C+Z&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Zeng%2C+Z&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Zeng%2C+Z&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Zeng%2C+Z&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.19334">arXiv:2502.19334</a> <span> [<a href="https://arxiv.org/pdf/2502.19334">pdf</a>, <a href="https://arxiv.org/format/2502.19334">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> Joint Optimal Transport and Embedding for Network Alignment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+Q">Qi Yu</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zhichen Zeng</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yuchen Yan</a>, <a href="/search/cs?searchtype=author&query=Ying%2C+L">Lei Ying</a>, <a href="/search/cs?searchtype=author&query=Srikant%2C+R">R. Srikant</a>, <a href="/search/cs?searchtype=author&query=Tong%2C+H">Hanghang Tong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.19334v1-abstract-short" style="display: inline;"> Network alignment, which aims to find node correspondence across different networks, is the cornerstone of various downstream multi-network and Web mining tasks. Most of the embedding-based methods indirectly model cross-network node relationships by contrasting positive and negative node pairs sampled from hand-crafted strategies, which are vulnerable to graph noises and lead to potential misalig… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.19334v1-abstract-full').style.display = 'inline'; document.getElementById('2502.19334v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.19334v1-abstract-full" style="display: none;"> Network alignment, which aims to find node correspondence across different networks, is the cornerstone of various downstream multi-network and Web mining tasks. Most of the embedding-based methods indirectly model cross-network node relationships by contrasting positive and negative node pairs sampled from hand-crafted strategies, which are vulnerable to graph noises and lead to potential misalignment of nodes. Another line of work based on the optimal transport (OT) theory directly models cross-network node relationships and generates noise-reduced alignments. However, OT methods heavily rely on fixed, pre-defined cost functions that prohibit end-to-end training and are hard to generalize. In this paper, we aim to unify the embedding and OT-based methods in a mutually beneficial manner and propose a joint optimal transport and embedding framework for network alignment named JOENA. For one thing (OT for embedding), through a simple yet effective transformation, the noise-reduced OT mapping serves as an adaptive sampling strategy directly modeling all cross-network node pairs for robust embedding learning.For another (embedding for OT), on top of the learned embeddings, the OT cost can be gradually trained in an end-to-end fashion, which further enhances the alignment quality. With a unified objective, the mutual benefits of both methods can be achieved by an alternating optimization schema with guaranteed convergence. Extensive experiments on real-world networks validate the effectiveness and scalability of JOENA, achieving up to 16% improvement in MRR and 20x speedup compared with the state-of-the-art alignment methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.19334v1-abstract-full').style.display = 'none'; document.getElementById('2502.19334v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.19178">arXiv:2502.19178</a> <span> [<a href="https://arxiv.org/pdf/2502.19178">pdf</a>, <a href="https://arxiv.org/format/2502.19178">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> UQABench: Evaluating User Embedding for Prompting LLMs in Personalized Question Answering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+L">Langming Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shilei Liu</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+Y">Yujin Yuan</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yizhen Zhang</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+B">Bencheng Yan</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zhiyuan Zeng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zihao Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiaqi Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Di Wang</a>, <a href="/search/cs?searchtype=author&query=Su%2C+W">Wenbo Su</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+P">Pengjie Wang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jian Xu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+B">Bo Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.19178v1-abstract-short" style="display: inline;"> Large language models (LLMs) achieve remarkable success in natural language processing (NLP). In practical scenarios like recommendations, as users increasingly seek personalized experiences, it becomes crucial to incorporate user interaction history into the context of LLMs to enhance personalization. However, from a practical utility perspective, user interactions' extensive length and noise pre… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.19178v1-abstract-full').style.display = 'inline'; document.getElementById('2502.19178v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.19178v1-abstract-full" style="display: none;"> Large language models (LLMs) achieve remarkable success in natural language processing (NLP). In practical scenarios like recommendations, as users increasingly seek personalized experiences, it becomes crucial to incorporate user interaction history into the context of LLMs to enhance personalization. However, from a practical utility perspective, user interactions' extensive length and noise present challenges when used directly as text prompts. A promising solution is to compress and distill interactions into compact embeddings, serving as soft prompts to assist LLMs in generating personalized responses. Although this approach brings efficiency, a critical concern emerges: Can user embeddings adequately capture valuable information and prompt LLMs? To address this concern, we propose \name, a benchmark designed to evaluate the effectiveness of user embeddings in prompting LLMs for personalization. We establish a fair and standardized evaluation process, encompassing pre-training, fine-tuning, and evaluation stages. To thoroughly evaluate user embeddings, we design three dimensions of tasks: sequence understanding, action prediction, and interest perception. These evaluation tasks cover the industry's demands in traditional recommendation tasks, such as improving prediction accuracy, and its aspirations for LLM-based methods, such as accurately understanding user interests and enhancing the user experience. We conduct extensive experiments on various state-of-the-art methods for modeling user embeddings. Additionally, we reveal the scaling laws of leveraging user embeddings to prompt LLMs. The benchmark is available online. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.19178v1-abstract-full').style.display = 'none'; document.getElementById('2502.19178v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 3 figures, 7 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.18817">arXiv:2502.18817</a> <span> [<a href="https://arxiv.org/pdf/2502.18817">pdf</a>, <a href="https://arxiv.org/format/2502.18817">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Judge as A Judge: Improving the Evaluation of Retrieval-Augmented Generation through the Judge-Consistency of Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shuliang Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xinze Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhenghao Liu</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yukun Yan</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+C">Cheng Yang</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zheni Zeng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhiyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Maosong Sun</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+G">Ge Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.18817v1-abstract-short" style="display: inline;"> Retrieval-Augmented Generation (RAG) has proven its effectiveness in alleviating hallucinations for Large Language Models (LLMs). However, existing automated evaluation metrics cannot fairly evaluate the outputs generated by RAG models during training and evaluation. LLM-based judgment models provide the potential to produce high-quality judgments, but they are highly sensitive to evaluation promp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.18817v1-abstract-full').style.display = 'inline'; document.getElementById('2502.18817v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.18817v1-abstract-full" style="display: none;"> Retrieval-Augmented Generation (RAG) has proven its effectiveness in alleviating hallucinations for Large Language Models (LLMs). However, existing automated evaluation metrics cannot fairly evaluate the outputs generated by RAG models during training and evaluation. LLM-based judgment models provide the potential to produce high-quality judgments, but they are highly sensitive to evaluation prompts, leading to inconsistencies when judging the output of RAG models. This paper introduces the Judge-Consistency (ConsJudge) method, which aims to enhance LLMs to generate more accurate evaluations for RAG models. Specifically, ConsJudge prompts LLMs to generate different judgments based on various combinations of judgment dimensions, utilize the judge-consistency to evaluate these judgments and select the accepted and rejected judgments for DPO training. Our experiments show that ConsJudge can effectively provide more accurate judgments for optimizing RAG models across various RAG models and datasets. Further analysis reveals that judgments generated by ConsJudge have a high agreement with the superior LLM. All codes are available at https://github.com/OpenBMB/ConsJudge. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.18817v1-abstract-full').style.display = 'none'; document.getElementById('2502.18817v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.18517">arXiv:2502.18517</a> <span> [<a href="https://arxiv.org/pdf/2502.18517">pdf</a>, <a href="https://arxiv.org/format/2502.18517">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> RewardDS: Privacy-Preserving Fine-Tuning for Large Language Models via Reward Driven Data Synthesis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jianwei Wang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Junyao Yang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Haoran Li</a>, <a href="/search/cs?searchtype=author&query=Zhuang%2C+H">Huiping Zhuang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+C">Cen Chen</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Ziqian Zeng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.18517v1-abstract-short" style="display: inline;"> The success of large language models (LLMs) has attracted many individuals to fine-tune them for domain-specific tasks by uploading their data. However, in sensitive areas like healthcare and finance, privacy concerns often arise. One promising solution is to sample synthetic data with Differential Privacy (DP) guarantees to replace private data. However, these synthetic data contain significant f… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.18517v1-abstract-full').style.display = 'inline'; document.getElementById('2502.18517v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.18517v1-abstract-full" style="display: none;"> The success of large language models (LLMs) has attracted many individuals to fine-tune them for domain-specific tasks by uploading their data. However, in sensitive areas like healthcare and finance, privacy concerns often arise. One promising solution is to sample synthetic data with Differential Privacy (DP) guarantees to replace private data. However, these synthetic data contain significant flawed data, which are considered as noise. Existing solutions typically rely on naive filtering by comparing ROUGE-L scores or embedding similarities, which are ineffective in addressing the noise. To address this issue, we propose RewardDS, a novel privacy-preserving framework that fine-tunes a reward proxy model and uses reward signals to guide the synthetic data generation. Our RewardDS introduces two key modules, Reward Guided Filtering and Self-Optimizing Refinement, to both filter and refine the synthetic data, effectively mitigating the noise. Extensive experiments across medical, financial, and code generation domains demonstrate the effectiveness of our method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.18517v1-abstract-full').style.display = 'none'; document.getElementById('2502.18517v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.17888">arXiv:2502.17888</a> <span> [<a href="https://arxiv.org/pdf/2502.17888">pdf</a>, <a href="https://arxiv.org/format/2502.17888">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> RankCoT: Refining Knowledge for Retrieval-Augmented Generation through Ranking Chain-of-Thoughts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wu%2C+M">Mingyan Wu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhenghao Liu</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yukun Yan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xinze Li</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+S">Shi Yu</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zheni Zeng</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+Y">Yu Gu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+G">Ge Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.17888v1-abstract-short" style="display: inline;"> Retrieval-Augmented Generation (RAG) enhances the performance of Large Language Models (LLMs) by incorporating external knowledge. However, LLMs still encounter challenges in effectively utilizing the knowledge from retrieved documents, often being misled by irrelevant or noisy information. To address this issue, we introduce RankCoT, a knowledge refinement method that incorporates reranking signa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17888v1-abstract-full').style.display = 'inline'; document.getElementById('2502.17888v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.17888v1-abstract-full" style="display: none;"> Retrieval-Augmented Generation (RAG) enhances the performance of Large Language Models (LLMs) by incorporating external knowledge. However, LLMs still encounter challenges in effectively utilizing the knowledge from retrieved documents, often being misled by irrelevant or noisy information. To address this issue, we introduce RankCoT, a knowledge refinement method that incorporates reranking signals in generating CoT-based summarization for knowledge refinement based on given query and all retrieval documents. During training, RankCoT prompts the LLM to generate Chain-of-Thought (CoT) candidates based on the query and individual documents. It then fine-tunes the LLM to directly reproduce the best CoT from these candidate outputs based on all retrieved documents, which requires LLM to filter out irrelevant documents during generating CoT-style summarization. Additionally, RankCoT incorporates a self-reflection mechanism that further refines the CoT outputs, resulting in higher-quality training data. Our experiments demonstrate the effectiveness of RankCoT, showing its superior performance over other knowledge refinement models. Further analysis reveals that RankCoT can provide shorter but effective refinement results, enabling the generator to produce more accurate answers. All code and data are available at https://github.com/NEUIR/RankCoT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17888v1-abstract-full').style.display = 'none'; document.getElementById('2502.17888v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.17494">arXiv:2502.17494</a> <span> [<a href="https://arxiv.org/pdf/2502.17494">pdf</a>, <a href="https://arxiv.org/format/2502.17494">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> External Large Foundation Model: How to Efficiently Serve Trillions of Parameters for Online Ads Recommendation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liang%2C+M">Mingfu Liang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xi Liu</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+R">Rong Jin</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+B">Boyang Liu</a>, <a href="/search/cs?searchtype=author&query=Suo%2C+Q">Qiuling Suo</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Q">Qinghai Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+S">Song Zhou</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+L">Laming Chen</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+H">Hua Zheng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhiyuan Li</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+S">Shali Jiang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jiyan Yang</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+X">Xiaozhen Xia</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+F">Fan Yang</a>, <a href="/search/cs?searchtype=author&query=Badr%2C+Y">Yasmine Badr</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+E">Ellie Wen</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+S">Shuyu Xu</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Hansey Chen</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhengyu Zhang</a>, <a href="/search/cs?searchtype=author&query=Nie%2C+J">Jade Nie</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+C">Chunzhi Yang</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zhichen Zeng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Weilin Zhang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xingliang Huang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Q">Qianru Li</a> , et al. (74 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.17494v2-abstract-short" style="display: inline;"> Ads recommendation is a prominent service of online advertising systems and has been actively studied. Recent studies indicate that scaling-up and advanced design of the recommendation model can bring significant performance improvement. However, with a larger model scale, such prior studies have a significantly increasing gap from industry as they often neglect two fundamental challenges in indus… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17494v2-abstract-full').style.display = 'inline'; document.getElementById('2502.17494v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.17494v2-abstract-full" style="display: none;"> Ads recommendation is a prominent service of online advertising systems and has been actively studied. Recent studies indicate that scaling-up and advanced design of the recommendation model can bring significant performance improvement. However, with a larger model scale, such prior studies have a significantly increasing gap from industry as they often neglect two fundamental challenges in industrial-scale applications. First, training and inference budgets are restricted for the model to be served, exceeding which may incur latency and impair user experience. Second, large-volume data arrive in a streaming mode with data distributions dynamically shifting, as new users/ads join and existing users/ads leave the system. We propose the External Large Foundation Model (ExFM) framework to address the overlooked challenges. Specifically, we develop external distillation and a data augmentation system (DAS) to control the computational cost of training/inference while maintaining high performance. We design the teacher in a way like a foundation model (FM) that can serve multiple students as vertical models (VMs) to amortize its building cost. We propose Auxiliary Head and Student Adapter to mitigate the data distribution gap between FM and VMs caused by the streaming data issue. Comprehensive experiments on internal industrial-scale applications and public datasets demonstrate significant performance gain by ExFM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.17494v2-abstract-full').style.display = 'none'; document.getElementById('2502.17494v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by the ACM Web Conference (WWW) 2025 Industrial Track as Oral Presentation</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.16852">arXiv:2502.16852</a> <span> [<a href="https://arxiv.org/pdf/2502.16852">pdf</a>, <a href="https://arxiv.org/format/2502.16852">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Improving LLM General Preference Alignment via Optimistic Online Mirror Descent </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yuheng Zhang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dian Yu</a>, <a href="/search/cs?searchtype=author&query=Ge%2C+T">Tao Ge</a>, <a href="/search/cs?searchtype=author&query=Song%2C+L">Linfeng Song</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zhichen Zeng</a>, <a href="/search/cs?searchtype=author&query=Mi%2C+H">Haitao Mi</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+N">Nan Jiang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dong Yu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.16852v1-abstract-short" style="display: inline;"> Reinforcement learning from human feedback (RLHF) has demonstrated remarkable effectiveness in aligning large language models (LLMs) with human preferences. Many existing alignment approaches rely on the Bradley-Terry (BT) model assumption, which assumes the existence of a ground-truth reward for each prompt-response pair. However, this assumption can be overly restrictive when modeling complex hu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16852v1-abstract-full').style.display = 'inline'; document.getElementById('2502.16852v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.16852v1-abstract-full" style="display: none;"> Reinforcement learning from human feedback (RLHF) has demonstrated remarkable effectiveness in aligning large language models (LLMs) with human preferences. Many existing alignment approaches rely on the Bradley-Terry (BT) model assumption, which assumes the existence of a ground-truth reward for each prompt-response pair. However, this assumption can be overly restrictive when modeling complex human preferences. In this paper, we drop the BT model assumption and study LLM alignment under general preferences, formulated as a two-player game. Drawing on theoretical insights from learning in games, we integrate optimistic online mirror descent into our alignment framework to approximate the Nash policy. Theoretically, we demonstrate that our approach achieves an $O(T^{-1})$ bound on the duality gap, improving upon the previous $O(T^{-1/2})$ result. More importantly, we implement our method and show through experiments that it outperforms state-of-the-art RLHF algorithms across multiple representative benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.16852v1-abstract-full').style.display = 'none'; document.getElementById('2502.16852v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.14922">arXiv:2502.14922</a> <span> [<a href="https://arxiv.org/pdf/2502.14922">pdf</a>, <a href="https://arxiv.org/format/2502.14922">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> SIFT: Grounding LLM Reasoning in Contexts via Stickers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zihao Zeng</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xuyao Huang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Boxiu Li</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+Z">Zhijie Deng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.14922v1-abstract-short" style="display: inline;"> This paper identifies the misinterpretation of the context can be a significant issue during the reasoning process of large language models, spanning from smaller models like Llama3.2-3B-Instruct to cutting-edge ones like DeepSeek-R1. For example, in the phrase "10 dollars per kilo," LLMs might not recognize that "per" means "for each," leading to calculation errors. We introduce a novel, post-tra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14922v1-abstract-full').style.display = 'inline'; document.getElementById('2502.14922v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.14922v1-abstract-full" style="display: none;"> This paper identifies the misinterpretation of the context can be a significant issue during the reasoning process of large language models, spanning from smaller models like Llama3.2-3B-Instruct to cutting-edge ones like DeepSeek-R1. For example, in the phrase "10 dollars per kilo," LLMs might not recognize that "per" means "for each," leading to calculation errors. We introduce a novel, post-training approach called **Stick to the Facts (SIFT)** to tackle this. SIFT leverages increasing inference-time compute to ground LLM reasoning in contexts. At the core of SIFT lies the *Sticker*, which is generated by the model itself to explicitly emphasize the key information within the context. Given the curated Sticker, SIFT generates two predictions -- one from the original query and one from the query augmented with the Sticker. If they differ, the Sticker is sequentially refined via *forward* optimization (to better align the extracted facts with the query) and *inverse* generation (to conform with the model's inherent tendencies) for more faithful reasoning outcomes. Studies across diverse models (from 3B to 100B+) and benchmarks (e.g., GSM8K, MATH-500) reveal consistent performance improvements. Notably, SIFT improves the pass@1 accuracy of DeepSeek-R1 on AIME2024 from 78.33% to **85.67**%, establishing a new state-of-the-art in the open-source community. The code is available at https://github.com/zhijie-group/SIFT. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.14922v1-abstract-full').style.display = 'none'; document.getElementById('2502.14922v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12562">arXiv:2502.12562</a> <span> [<a href="https://arxiv.org/pdf/2502.12562">pdf</a>, <a href="https://arxiv.org/format/2502.12562">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> SEA: Low-Resource Safety Alignment for Multimodal Large Language Models via Synthetic Embeddings </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lu%2C+W">Weikai Lu</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+H">Hao Peng</a>, <a href="/search/cs?searchtype=author&query=Zhuang%2C+H">Huiping Zhuang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+C">Cen Chen</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Ziqian Zeng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12562v1-abstract-short" style="display: inline;"> Multimodal Large Language Models (MLLMs) have serious security vulnerabilities.While safety alignment using multimodal datasets consisting of text and data of additional modalities can effectively enhance MLLM's security, it is costly to construct these datasets. Existing low-resource security alignment methods, including textual alignment, have been found to struggle with the security risks posed… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12562v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12562v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12562v1-abstract-full" style="display: none;"> Multimodal Large Language Models (MLLMs) have serious security vulnerabilities.While safety alignment using multimodal datasets consisting of text and data of additional modalities can effectively enhance MLLM's security, it is costly to construct these datasets. Existing low-resource security alignment methods, including textual alignment, have been found to struggle with the security risks posed by additional modalities. To address this, we propose Synthetic Embedding augmented safety Alignment (SEA), which optimizes embeddings of additional modality through gradient updates to expand textual datasets. This enables multimodal safety alignment training even when only textual data is available. Extensive experiments on image, video, and audio-based MLLMs demonstrate that SEA can synthesize a high-quality embedding on a single RTX3090 GPU within 24 seconds. SEA significantly improves the security of MLLMs when faced with threats from additional modalities. To assess the security risks introduced by video and audio, we also introduced a new benchmark called VA-SafetyBench. High attack success rates across multiple MLLMs validate its challenge. Our code and data will be available at https://github.com/ZeroNLP/SEA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12562v1-abstract-full').style.display = 'none'; document.getElementById('2502.12562v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12226">arXiv:2502.12226</a> <span> [<a href="https://arxiv.org/pdf/2502.12226">pdf</a>, <a href="https://arxiv.org/format/2502.12226">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> On Creating a Causally Grounded Usable Rating Method for Assessing the Robustness of Foundation Models Supporting Time Series </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lakkaraju%2C+K">Kausik Lakkaraju</a>, <a href="/search/cs?searchtype=author&query=Kaur%2C+R">Rachneet Kaur</a>, <a href="/search/cs?searchtype=author&query=Zehtabi%2C+P">Parisa Zehtabi</a>, <a href="/search/cs?searchtype=author&query=Patra%2C+S">Sunandita Patra</a>, <a href="/search/cs?searchtype=author&query=Valluru%2C+S+L">Siva Likitha Valluru</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zhen Zeng</a>, <a href="/search/cs?searchtype=author&query=Srivastava%2C+B">Biplav Srivastava</a>, <a href="/search/cs?searchtype=author&query=Valtorta%2C+M">Marco Valtorta</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12226v1-abstract-short" style="display: inline;"> Foundation Models (FMs) have improved time series forecasting in various sectors, such as finance, but their vulnerability to input disturbances can hinder their adoption by stakeholders, such as investors and analysts. To address this, we propose a causally grounded rating framework to study the robustness of Foundational Models for Time Series (FMTS) with respect to input perturbations. We evalu… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12226v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12226v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12226v1-abstract-full" style="display: none;"> Foundation Models (FMs) have improved time series forecasting in various sectors, such as finance, but their vulnerability to input disturbances can hinder their adoption by stakeholders, such as investors and analysts. To address this, we propose a causally grounded rating framework to study the robustness of Foundational Models for Time Series (FMTS) with respect to input perturbations. We evaluate our approach to the stock price prediction problem, a well-studied problem with easily accessible public data, evaluating six state-of-the-art (some multi-modal) FMTS across six prominent stocks spanning three industries. The ratings proposed by our framework effectively assess the robustness of FMTS and also offer actionable insights for model selection and deployment. Within the scope of our study, we find that (1) multi-modal FMTS exhibit better robustness and accuracy compared to their uni-modal versions and, (2) FMTS pre-trained on time series forecasting task exhibit better robustness and forecasting accuracy compared to general-purpose FMTS pre-trained across diverse settings. Further, to validate our framework's usability, we conduct a user study showcasing FMTS prediction errors along with our computed ratings. The study confirmed that our ratings reduced the difficulty for users in comparing the robustness of different systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12226v1-abstract-full').style.display = 'none'; document.getElementById('2502.12226v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12216">arXiv:2502.12216</a> <span> [<a href="https://arxiv.org/pdf/2502.12216">pdf</a>, <a href="https://arxiv.org/format/2502.12216">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Tactic: Adaptive Sparse Attention with Clustering and Distribution Fitting for Long-Context LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+K">Kan Zhu</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+T">Tian Tang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Q">Qinyu Xu</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+Y">Yile Gu</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zhichen Zeng</a>, <a href="/search/cs?searchtype=author&query=Kadekodi%2C+R">Rohan Kadekodi</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+L">Liangyu Zhao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+A">Ang Li</a>, <a href="/search/cs?searchtype=author&query=Krishnamurthy%2C+A">Arvind Krishnamurthy</a>, <a href="/search/cs?searchtype=author&query=Kasikci%2C+B">Baris Kasikci</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12216v1-abstract-short" style="display: inline;"> Long-context models are essential for many applications but face inefficiencies in loading large KV caches during decoding. Prior methods enforce fixed token budgets for sparse attention, assuming a set number of tokens can approximate full attention. However, these methods overlook variations in the importance of attention across heads, layers, and contexts. To address these limitations, we propo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12216v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12216v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12216v1-abstract-full" style="display: none;"> Long-context models are essential for many applications but face inefficiencies in loading large KV caches during decoding. Prior methods enforce fixed token budgets for sparse attention, assuming a set number of tokens can approximate full attention. However, these methods overlook variations in the importance of attention across heads, layers, and contexts. To address these limitations, we propose Tactic, a sparsity-adaptive and calibration-free sparse attention mechanism that dynamically selects tokens based on their cumulative attention scores rather than a fixed token budget. By setting a target fraction of total attention scores, Tactic ensures that token selection naturally adapts to variations in attention sparsity. To efficiently approximate this selection, Tactic leverages clustering-based sorting and distribution fitting, allowing it to accurately estimate token importance with minimal computational overhead. We show that Tactic outperforms existing sparse attention algorithms, achieving superior accuracy and up to 7.29x decode attention speedup. This improvement translates to an overall 1.58x end-to-end inference speedup, making Tactic a practical and effective solution for long-context LLM inference in accuracy-sensitive applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12216v1-abstract-full').style.display = 'none'; document.getElementById('2502.12216v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12215">arXiv:2502.12215</a> <span> [<a href="https://arxiv.org/pdf/2502.12215">pdf</a>, <a href="https://arxiv.org/format/2502.12215">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Revisiting the Test-Time Scaling of o1-like Models: Do they Truly Possess Test-Time Scaling Capabilities? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zhiyuan Zeng</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+Q">Qinyuan Cheng</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+Z">Zhangyue Yin</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yunhua Zhou</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+X">Xipeng Qiu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12215v1-abstract-short" style="display: inline;"> The advent of test-time scaling in large language models (LLMs), exemplified by OpenAI's o1 series, has advanced reasoning capabilities by scaling computational resource allocation during inference. While successors like QwQ, Deepseek-R1 (R1) and LIMO replicate these advancements, whether these models truly possess test-time scaling capabilities remains underexplored. This study found that longer… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12215v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12215v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12215v1-abstract-full" style="display: none;"> The advent of test-time scaling in large language models (LLMs), exemplified by OpenAI's o1 series, has advanced reasoning capabilities by scaling computational resource allocation during inference. While successors like QwQ, Deepseek-R1 (R1) and LIMO replicate these advancements, whether these models truly possess test-time scaling capabilities remains underexplored. This study found that longer CoTs of these o1-like models do not consistently enhance accuracy; in fact, correct solutions are often shorter than incorrect ones for the same questions. Further investigation shows this phenomenon is closely related to models' self-revision capabilities - longer CoTs contain more self-revisions, which often lead to performance degradation. We then compare sequential and parallel scaling strategies on QwQ, R1 and LIMO, finding that parallel scaling achieves better coverage and scalability. Based on these insights, we propose Shortest Majority Vote, a method that combines parallel scaling strategies with CoT length characteristics, significantly improving models' test-time scalability compared to conventional majority voting approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12215v1-abstract-full').style.display = 'none'; document.getElementById('2502.12215v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.12134">arXiv:2502.12134</a> <span> [<a href="https://arxiv.org/pdf/2502.12134">pdf</a>, <a href="https://arxiv.org/format/2502.12134">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> SoftCoT: Soft Chain-of-Thought for Efficient Reasoning with LLMs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yige Xu</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+X">Xu Guo</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zhiwei Zeng</a>, <a href="/search/cs?searchtype=author&query=Miao%2C+C">Chunyan Miao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.12134v1-abstract-short" style="display: inline;"> Chain-of-Thought (CoT) reasoning enables Large Language Models (LLMs) to solve complex reasoning tasks by generating intermediate reasoning steps. However, most existing approaches focus on hard token decoding, which constrains reasoning within the discrete vocabulary space and may not always be optimal. While recent efforts explore continuous-space reasoning, they often suffer from catastrophic f… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12134v1-abstract-full').style.display = 'inline'; document.getElementById('2502.12134v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.12134v1-abstract-full" style="display: none;"> Chain-of-Thought (CoT) reasoning enables Large Language Models (LLMs) to solve complex reasoning tasks by generating intermediate reasoning steps. However, most existing approaches focus on hard token decoding, which constrains reasoning within the discrete vocabulary space and may not always be optimal. While recent efforts explore continuous-space reasoning, they often suffer from catastrophic forgetting, limiting their applicability to state-of-the-art LLMs that already perform well in zero-shot settings with a proper instruction. To address this challenge, we propose a novel approach for continuous-space reasoning that does not require modifying the underlying LLM. Specifically, we employ a lightweight assistant model to generate instance-specific soft thought tokens speculatively as the initial chain of thoughts, which are then mapped into the LLM's representation space via a projection module. Experimental results on five reasoning benchmarks demonstrate that our method enhances LLM reasoning performance through supervised, parameter-efficient fine-tuning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.12134v1-abstract-full').style.display = 'none'; document.getElementById('2502.12134v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11370">arXiv:2502.11370</a> <span> [<a href="https://arxiv.org/pdf/2502.11370">pdf</a>, <a href="https://arxiv.org/format/2502.11370">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> HI-GVF: Shared Control based on Human-Influenced Guiding Vector Fields for Human-multi-robot Cooperation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+P">Pengming Zhu</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Z">Zongtan Zhou</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+W">Weijia Yao</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+W">Wei Dai</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zhiwen Zeng</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+H">Huimin Lu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11370v1-abstract-short" style="display: inline;"> Human-multi-robot shared control leverages human decision-making and robotic autonomy to enhance human-robot collaboration. While widely studied, existing systems often adopt a leader-follower model, limiting robot autonomy to some extent. Besides, a human is required to directly participate in the motion control of robots through teleoperation, which significantly burdens the operator. To allevia… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11370v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11370v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11370v1-abstract-full" style="display: none;"> Human-multi-robot shared control leverages human decision-making and robotic autonomy to enhance human-robot collaboration. While widely studied, existing systems often adopt a leader-follower model, limiting robot autonomy to some extent. Besides, a human is required to directly participate in the motion control of robots through teleoperation, which significantly burdens the operator. To alleviate these two issues, we propose a layered shared control computing framework using human-influenced guiding vector fields (HI-GVF) for human-robot collaboration. HI-GVF guides the multi-robot system along a desired path specified by the human. Then, an intention field is designed to merge the human and robot intentions, accelerating the propagation of the human intention within the multi-robot system. Moreover, we give the stability analysis of the proposed model and use collision avoidance based on safety barrier certificates to fine-tune the velocity. Eventually, considering the firefighting task as an example scenario, we conduct simulations and experiments using multiple human-robot interfaces (brain-computer interface, myoelectric wristband, eye-tracking), and the results demonstrate that our proposed approach boosts the effectiveness and performance of the task. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11370v1-abstract-full').style.display = 'none'; document.getElementById('2502.11370v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.09768">arXiv:2502.09768</a> <span> [<a href="https://arxiv.org/pdf/2502.09768">pdf</a>, <a href="https://arxiv.org/format/2502.09768">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Physics and Society">physics.soc-ph</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TSMC.2025.3525465">10.1109/TSMC.2025.3525465 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Complex Network Modelling with Power-law Activating Patterns and Its Evolutionary Dynamics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Ziyan Zeng</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+M">Minyu Feng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+P">Pengfei Liu</a>, <a href="/search/cs?searchtype=author&query=Kurths%2C+J">Jurgen Kurths</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.09768v1-abstract-short" style="display: inline;"> Complex network theory provides a unifying framework for the study of structured dynamic systems. The current literature emphasizes a widely reported phenomenon of intermittent interaction among network vertices. In this paper, we introduce a complex network model that considers the stochastic switching of individuals between activated and quiescent states at power-law rates and the corresponding… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09768v1-abstract-full').style.display = 'inline'; document.getElementById('2502.09768v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.09768v1-abstract-full" style="display: none;"> Complex network theory provides a unifying framework for the study of structured dynamic systems. The current literature emphasizes a widely reported phenomenon of intermittent interaction among network vertices. In this paper, we introduce a complex network model that considers the stochastic switching of individuals between activated and quiescent states at power-law rates and the corresponding evolutionary dynamics. By using the Markov chain and renewal theory, we discover a homogeneous stationary distribution of activated sizes in the network with power-law activating patterns and infer some statistical characteristics. To better understand the effect of power-law activating patterns, we study the two-person-two-strategy evolutionary game dynamics, demonstrate the absorbability of strategies, and obtain the critical cooperation conditions for prisoner's dilemmas in homogeneous networks without mutation. The evolutionary dynamics in real networks are also discussed. Our results provide a new perspective to analyze and understand social physics in time-evolving network systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.09768v1-abstract-full').style.display = 'none'; document.getElementById('2502.09768v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 9 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Transactions on Systems, Man, and Cybernetics: Systems (2025) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.08309">arXiv:2502.08309</a> <span> [<a href="https://arxiv.org/pdf/2502.08309">pdf</a>, <a href="https://arxiv.org/format/2502.08309">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Unlocking Scaling Law in Industrial Recommendation Systems with a Three-step Paradigm based Large User Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yan%2C+B">Bencheng Yan</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shilei Liu</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zhiyuan Zeng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zihao Wang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yizhen Zhang</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+Y">Yujin Yuan</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+L">Langming Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiaqi Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Di Wang</a>, <a href="/search/cs?searchtype=author&query=Su%2C+W">Wenbo Su</a>, <a href="/search/cs?searchtype=author&query=Pengjie%2C+W">Wang Pengjie</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+J">Jian Xu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+B">Bo Zheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.08309v1-abstract-short" style="display: inline;"> Recent advancements in autoregressive Large Language Models (LLMs) have achieved significant milestones, largely attributed to their scalability, often referred to as the "scaling law". Inspired by these achievements, there has been a growing interest in adapting LLMs for Recommendation Systems (RecSys) by reformulating RecSys tasks into generative problems. However, these End-to-End Generative Re… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08309v1-abstract-full').style.display = 'inline'; document.getElementById('2502.08309v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.08309v1-abstract-full" style="display: none;"> Recent advancements in autoregressive Large Language Models (LLMs) have achieved significant milestones, largely attributed to their scalability, often referred to as the "scaling law". Inspired by these achievements, there has been a growing interest in adapting LLMs for Recommendation Systems (RecSys) by reformulating RecSys tasks into generative problems. However, these End-to-End Generative Recommendation (E2E-GR) methods tend to prioritize idealized goals, often at the expense of the practical advantages offered by traditional Deep Learning based Recommendation Models (DLRMs) in terms of in features, architecture, and practices. This disparity between idealized goals and practical needs introduces several challenges and limitations, locking the scaling law in industrial RecSys. In this paper, we introduce a large user model (LUM) that addresses these limitations through a three-step paradigm, designed to meet the stringent requirements of industrial settings while unlocking the potential for scalable recommendations. Our extensive experimental evaluations demonstrate that LUM outperforms both state-of-the-art DLRMs and E2E-GR approaches. Notably, LUM exhibits excellent scalability, with performance improvements observed as the model scales up to 7 billion parameters. Additionally, we have successfully deployed LUM in an industrial application, where it achieved significant gains in an A/B test, further validating its effectiveness and practicality. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.08309v1-abstract-full').style.display = 'none'; document.getElementById('2502.08309v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05416">arXiv:2502.05416</a> <span> [<a href="https://arxiv.org/pdf/2502.05416">pdf</a>, <a href="https://arxiv.org/format/2502.05416">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Deep Generative Models with Hard Linear Equality Constraints </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+R">Ruoyan Li</a>, <a href="/search/cs?searchtype=author&query=Sahu%2C+D+R">Dipti Ranjan Sahu</a>, <a href="/search/cs?searchtype=author&query=Broeck%2C+G+V+d">Guy Van den Broeck</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zhe Zeng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05416v2-abstract-short" style="display: inline;"> While deep generative models~(DGMs) have demonstrated remarkable success in capturing complex data distributions, they consistently fail to learn constraints that encode domain knowledge and thus require constraint integration. Existing solutions to this challenge have primarily relied on heuristic methods and often ignore the underlying data distribution, harming the generative performance. In th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05416v2-abstract-full').style.display = 'inline'; document.getElementById('2502.05416v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05416v2-abstract-full" style="display: none;"> While deep generative models~(DGMs) have demonstrated remarkable success in capturing complex data distributions, they consistently fail to learn constraints that encode domain knowledge and thus require constraint integration. Existing solutions to this challenge have primarily relied on heuristic methods and often ignore the underlying data distribution, harming the generative performance. In this work, we propose a probabilistically sound approach for enforcing the hard constraints into DGMs to generate constraint-compliant and realistic data. This is achieved by our proposed gradient estimators that allow the constrained distribution, the data distribution conditioned on constraints, to be differentiably learned. We carry out extensive experiments with various DGM model architectures over five image datasets and three scientific applications in which domain knowledge is governed by linear equality constraints. We validate that the standard DGMs almost surely generate data violating the constraints. Among all the constraint integration strategies, ours not only guarantees the satisfaction of constraints in generation but also archives superior generative performance than the other methods across every benchmark. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05416v2-abstract-full').style.display = 'none'; document.getElementById('2502.05416v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.04960">arXiv:2502.04960</a> <span> [<a href="https://arxiv.org/pdf/2502.04960">pdf</a>, <a href="https://arxiv.org/format/2502.04960">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Commonality and Individuality! Integrating Humor Commonality with Speaker Individuality for Humor Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhu%2C+H">Haohao Zhu</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+J">Junyu Lu</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zeyuan Zeng</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+Z">Zewen Bai</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiaokun Zhang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+L">Liang Yang</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+H">Hongfei Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.04960v1-abstract-short" style="display: inline;"> Humor recognition aims to identify whether a specific speaker's text is humorous. Current methods for humor recognition mainly suffer from two limitations: (1) they solely focus on one aspect of humor commonalities, ignoring the multifaceted nature of humor; and (2) they typically overlook the critical role of speaker individuality, which is essential for a comprehensive understanding of humor exp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04960v1-abstract-full').style.display = 'inline'; document.getElementById('2502.04960v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.04960v1-abstract-full" style="display: none;"> Humor recognition aims to identify whether a specific speaker's text is humorous. Current methods for humor recognition mainly suffer from two limitations: (1) they solely focus on one aspect of humor commonalities, ignoring the multifaceted nature of humor; and (2) they typically overlook the critical role of speaker individuality, which is essential for a comprehensive understanding of humor expressions. To bridge these gaps, we introduce the Commonality and Individuality Incorporated Network for Humor Recognition (CIHR), a novel model designed to enhance humor recognition by integrating multifaceted humor commonalities with the distinctive individuality of speakers. The CIHR features a Humor Commonality Analysis module that explores various perspectives of multifaceted humor commonality within user texts, and a Speaker Individuality Extraction module that captures both static and dynamic aspects of a speaker's profile to accurately model their distinctive individuality. Additionally, Static and Dynamic Fusion modules are introduced to effectively incorporate the humor commonality with speaker's individuality in the humor recognition process. Extensive experiments demonstrate the effectiveness of CIHR, underscoring the importance of concurrently addressing both multifaceted humor commonality and distinctive speaker individuality in humor recognition. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.04960v1-abstract-full').style.display = 'none'; document.getElementById('2502.04960v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by NAACL 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.02792">arXiv:2502.02792</a> <span> [<a href="https://arxiv.org/pdf/2502.02792">pdf</a>, <a href="https://arxiv.org/format/2502.02792">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Where Do Passengers Gaze? Impact of Passengers' Personality Traits on Their Gaze Pattern Toward Pedestrians During APMV-Pedestrian Interactions with Diverse eHMIs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hailong Liu</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zhe Zeng</a>, <a href="/search/cs?searchtype=author&query=Wada%2C+T">Takahiro Wada</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.02792v1-abstract-short" style="display: inline;"> Autonomous Personal Mobility Vehicles (APMVs) are designed to address the ``last-mile'' transportation challenge for everyone. When an APMV encounters a pedestrian, it uses an external Human-Machine Interface (eHMI) to negotiate road rights. Through this interaction, passengers also engage with the process. This study examines passengers' gaze behavior toward pedestrians during such interactions,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02792v1-abstract-full').style.display = 'inline'; document.getElementById('2502.02792v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.02792v1-abstract-full" style="display: none;"> Autonomous Personal Mobility Vehicles (APMVs) are designed to address the ``last-mile'' transportation challenge for everyone. When an APMV encounters a pedestrian, it uses an external Human-Machine Interface (eHMI) to negotiate road rights. Through this interaction, passengers also engage with the process. This study examines passengers' gaze behavior toward pedestrians during such interactions, focusing on whether different eHMI designs influence gaze patterns based on passengers' personality traits. The results indicated that when using a visual-based eHMI, passengers often struggled to perceive the communication content. Consequently, passengers with higher Neuroticism scores, who were more sensitive to communication details, might seek cues from pedestrians' reactions. In addition, a multimodal eHMI (visual and voice) using neutral voice did not significantly affect the gaze behavior of passengers toward pedestrians, regardless of personality traits. In contrast, a multimodal eHMI using affective voice encouraged passengers with high Openness to Experience scores to focus on pedestrians' heads. In summary, this study revealed how different eHMI designs influence passengers' gaze behavior and highlighted the effects of personality traits on their gaze patterns toward pedestrians, providing new insights for personalized eHMI designs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.02792v1-abstract-full').style.display = 'none'; document.getElementById('2502.02792v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.17974">arXiv:2501.17974</a> <span> [<a href="https://arxiv.org/pdf/2501.17974">pdf</a>, <a href="https://arxiv.org/format/2501.17974">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Think Smarter not Harder: Adaptive Reasoning with Inference Aware Optimization </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+Z">Zishun Yu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+T">Tengyu Xu</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+D">Di Jin</a>, <a href="/search/cs?searchtype=author&query=Sankararaman%2C+K+A">Karthik Abinav Sankararaman</a>, <a href="/search/cs?searchtype=author&query=He%2C+Y">Yun He</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+W">Wenxuan Zhou</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zhouhao Zeng</a>, <a href="/search/cs?searchtype=author&query=Helenowski%2C+E">Eryk Helenowski</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+C">Chen Zhu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Sinong Wang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+H">Hao Ma</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+H">Han Fang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.17974v2-abstract-short" style="display: inline;"> Solving mathematics problems has been an intriguing capability of large language models, and many efforts have been made to improve reasoning by extending reasoning length, such as through self-correction and extensive long chain-of-thoughts. While promising in problem-solving, advanced long reasoning chain models exhibit an undesired single-modal behavior, where trivial questions require unnecess… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17974v2-abstract-full').style.display = 'inline'; document.getElementById('2501.17974v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.17974v2-abstract-full" style="display: none;"> Solving mathematics problems has been an intriguing capability of large language models, and many efforts have been made to improve reasoning by extending reasoning length, such as through self-correction and extensive long chain-of-thoughts. While promising in problem-solving, advanced long reasoning chain models exhibit an undesired single-modal behavior, where trivial questions require unnecessarily tedious long chains of thought. In this work, we propose a way to allow models to be aware of inference budgets by formulating it as utility maximization with respect to an inference budget constraint, hence naming our algorithm Inference Budget-Constrained Policy Optimization (IBPO). In a nutshell, models fine-tuned through IBPO learn to ``understand'' the difficulty of queries and allocate inference budgets to harder ones. With different inference budgets, our best models are able to have a $4.14$\% and $5.74$\% absolute improvement ($8.08$\% and $11.2$\% relative improvement) on MATH500 using $2.16$x and $4.32$x inference budgets respectively, relative to LLaMA3.1 8B Instruct. These improvements are approximately $2$x those of self-consistency under the same budgets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17974v2-abstract-full').style.display = 'none'; document.getElementById('2501.17974v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.17690">arXiv:2501.17690</a> <span> [<a href="https://arxiv.org/pdf/2501.17690">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Segmentation-Aware Generative Reinforcement Network (GRN) for Tissue Layer Segmentation in 3-D Ultrasound Images for Chronic Low-back Pain (cLBP) Assessment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zixue Zeng</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+X">Xiaoyan Zhao</a>, <a href="/search/cs?searchtype=author&query=Cartier%2C+M">Matthew Cartier</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+T">Tong Yu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jing Wang</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+X">Xin Meng</a>, <a href="/search/cs?searchtype=author&query=Sheng%2C+Z">Zhiyu Sheng</a>, <a href="/search/cs?searchtype=author&query=Satarpour%2C+M">Maryam Satarpour</a>, <a href="/search/cs?searchtype=author&query=Cormack%2C+J+M">John M Cormack</a>, <a href="/search/cs?searchtype=author&query=Bean%2C+A">Allison Bean</a>, <a href="/search/cs?searchtype=author&query=Nussbaum%2C+R">Ryan Nussbaum</a>, <a href="/search/cs?searchtype=author&query=Maurer%2C+M">Maya Maurer</a>, <a href="/search/cs?searchtype=author&query=Landis-Walkenhorst%2C+E">Emily Landis-Walkenhorst</a>, <a href="/search/cs?searchtype=author&query=Kumbhare%2C+D">Dinesh Kumbhare</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+K">Kang Kim</a>, <a href="/search/cs?searchtype=author&query=Wasan%2C+A">Ajay Wasan</a>, <a href="/search/cs?searchtype=author&query=Pu%2C+J">Jiantao Pu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.17690v1-abstract-short" style="display: inline;"> We introduce a novel segmentation-aware joint training framework called generative reinforcement network (GRN) that integrates segmentation loss feedback to optimize both image generation and segmentation performance in a single stage. An image enhancement technique called segmentation-guided enhancement (SGE) is also developed, where the generator produces images tailored specifically for the seg… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17690v1-abstract-full').style.display = 'inline'; document.getElementById('2501.17690v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.17690v1-abstract-full" style="display: none;"> We introduce a novel segmentation-aware joint training framework called generative reinforcement network (GRN) that integrates segmentation loss feedback to optimize both image generation and segmentation performance in a single stage. An image enhancement technique called segmentation-guided enhancement (SGE) is also developed, where the generator produces images tailored specifically for the segmentation model. Two variants of GRN were also developed, including GRN for sample-efficient learning (GRN-SEL) and GRN for semi-supervised learning (GRN-SSL). GRN's performance was evaluated using a dataset of 69 fully annotated 3D ultrasound scans from 29 subjects. The annotations included six anatomical structures: dermis, superficial fat, superficial fascial membrane (SFM), deep fat, deep fascial membrane (DFM), and muscle. Our results show that GRN-SEL with SGE reduces labeling efforts by up to 70% while achieving a 1.98% improvement in the Dice Similarity Coefficient (DSC) compared to models trained on fully labeled datasets. GRN-SEL alone reduces labeling efforts by 60%, GRN-SSL with SGE decreases labeling requirements by 70%, and GRN-SSL alone by 60%, all while maintaining performance comparable to fully supervised models. These findings suggest the effectiveness of the GRN framework in optimizing segmentation performance with significantly less labeled data, offering a scalable and efficient solution for ultrasound image analysis and reducing the burdens associated with data annotation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17690v1-abstract-full').style.display = 'none'; document.getElementById('2501.17690v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.17636">arXiv:2501.17636</a> <span> [<a href="https://arxiv.org/pdf/2501.17636">pdf</a>, <a href="https://arxiv.org/format/2501.17636">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Efficient Interactive 3D Multi-Object Removal </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ni%2C+J">Jingcheng Ni</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+W">Weiguang Zhao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Daniel Wang</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Ziyao Zeng</a>, <a href="/search/cs?searchtype=author&query=You%2C+C">Chenyu You</a>, <a href="/search/cs?searchtype=author&query=Wong%2C+A">Alex Wong</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+K">Kaizhu Huang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.17636v2-abstract-short" style="display: inline;"> Object removal is of great significance to 3D scene understanding, essential for applications in content filtering and scene editing. Current mainstream methods primarily focus on removing individual objects, with a few methods dedicated to eliminating an entire area or all objects of a certain category. They however confront the challenge of insufficient granularity and flexibility for real-world… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17636v2-abstract-full').style.display = 'inline'; document.getElementById('2501.17636v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.17636v2-abstract-full" style="display: none;"> Object removal is of great significance to 3D scene understanding, essential for applications in content filtering and scene editing. Current mainstream methods primarily focus on removing individual objects, with a few methods dedicated to eliminating an entire area or all objects of a certain category. They however confront the challenge of insufficient granularity and flexibility for real-world applications, where users demand tailored excision and preservation of objects within defined zones. In addition, most of the current methods require kinds of priors when addressing multi-view inpainting, which is time-consuming. To address these limitations, we propose an efficient and user-friendly pipeline for 3D multi-object removal, enabling users to flexibly select areas and define objects for removal or preservation. Concretely, to ensure object consistency and correspondence across multiple views, we propose a novel mask matching and refinement module, which integrates homography-based warping with high-confidence anchor points for segmentation. By leveraging the IoU joint shape context distance loss, we enhance the accuracy of warped masks and improve subsequent inpainting processes. Considering the current immaturity of 3D multi-object removal, we provide a new evaluation dataset to bridge the developmental void. Experimental results demonstrate that our method significantly reduces computational costs, achieving processing speeds more than 80% faster than state-of-the-art methods while maintaining equivalent or higher reconstruction quality. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.17636v2-abstract-full').style.display = 'none'; document.getElementById('2501.17636v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.13391">arXiv:2501.13391</a> <span> [<a href="https://arxiv.org/pdf/2501.13391">pdf</a>, <a href="https://arxiv.org/format/2501.13391">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Can Large Language Models Understand Preferences in Personalized Recommendation? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tan%2C+Z">Zhaoxuan Tan</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zinan Zeng</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Q">Qingkai Zeng</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zhenyu Wu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zheyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Mo%2C+F">Fengran Mo</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+M">Meng Jiang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.13391v1-abstract-short" style="display: inline;"> Large Language Models (LLMs) excel in various tasks, including personalized recommendations. Existing evaluation methods often focus on rating prediction, relying on regression errors between actual and predicted ratings. However, user rating bias and item quality, two influential factors behind rating scores, can obscure personal preferences in user-item pair data. To address this, we introduce P… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13391v1-abstract-full').style.display = 'inline'; document.getElementById('2501.13391v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.13391v1-abstract-full" style="display: none;"> Large Language Models (LLMs) excel in various tasks, including personalized recommendations. Existing evaluation methods often focus on rating prediction, relying on regression errors between actual and predicted ratings. However, user rating bias and item quality, two influential factors behind rating scores, can obscure personal preferences in user-item pair data. To address this, we introduce PerRecBench, disassociating the evaluation from these two factors and assessing recommendation techniques on capturing the personal preferences in a grouped ranking manner. We find that the LLM-based recommendation techniques that are generally good at rating prediction fail to identify users' favored and disfavored items when the user rating bias and item quality are eliminated by grouping users. With PerRecBench and 19 LLMs, we find that while larger models generally outperform smaller ones, they still struggle with personalized recommendation. Our findings reveal the superiority of pairwise and listwise ranking approaches over pointwise ranking, PerRecBench's low correlation with traditional regression metrics, the importance of user profiles, and the role of pretraining data distributions. We further explore three supervised fine-tuning strategies, finding that merging weights from single-format training is promising but improving LLMs' understanding of user preferences remains an open research problem. Code and data are available at https://github.com/TamSiuhin/PerRecBench <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13391v1-abstract-full').style.display = 'none'; document.getElementById('2501.13391v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.11592">arXiv:2501.11592</a> <span> [<a href="https://arxiv.org/pdf/2501.11592">pdf</a>, <a href="https://arxiv.org/format/2501.11592">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Training-free Ultra Small Model for Universal Sparse Reconstruction in Compressed Sensing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tang%2C+C">Chaoqing Tang</a>, <a href="/search/cs?searchtype=author&query=Zhuang%2C+H">Huanze Zhuang</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+G">Guiyun Tian</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zhenli Zeng</a>, <a href="/search/cs?searchtype=author&query=Ding%2C+Y">Yi Ding</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+W">Wenzhong Liu</a>, <a href="/search/cs?searchtype=author&query=Bai%2C+X">Xiang Bai</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.11592v2-abstract-short" style="display: inline;"> Pre-trained large models attract widespread attention in recent years, but they face challenges in applications that require high interpretability or have limited resources, such as physical sensing, medical imaging, and bioinformatics. Compressed Sensing (CS) is a well-proved theory that drives many recent breakthroughs in these applications. However, as a typical under-determined linear system,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11592v2-abstract-full').style.display = 'inline'; document.getElementById('2501.11592v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.11592v2-abstract-full" style="display: none;"> Pre-trained large models attract widespread attention in recent years, but they face challenges in applications that require high interpretability or have limited resources, such as physical sensing, medical imaging, and bioinformatics. Compressed Sensing (CS) is a well-proved theory that drives many recent breakthroughs in these applications. However, as a typical under-determined linear system, CS suffers from excessively long sparse reconstruction times when using traditional iterative methods, particularly with large-scale data. Current AI methods like deep unfolding fail to substitute them because pre-trained models exhibit poor generality beyond their training conditions and dataset distributions, or lack interpretability. Instead of following the big model fervor, this paper proposes ultra-small artificial neural models called coefficients learning (CL), enabling training-free and rapid sparse reconstruction while perfectly inheriting the generality and interpretability of traditional iterative methods, bringing new feature of incorporating prior knowledges. In CL, a signal of length $n$ only needs a minimal of $n$ trainable parameters. A case study model called CLOMP is implemented for evaluation. Experiments are conducted on both synthetic and real one-dimensional and two-dimensional signals, demonstrating significant improvements in efficiency and accuracy. Compared to representative iterative methods, CLOMP improves efficiency by 100 to 1000 folds for large-scale data. Test results on eight diverse image datasets indicate that CLOMP improves structural similarity index by 292%, 98%, 45% for sampling rates of 0.1, 0.3, 0.5, respectively. We believe this method can truly usher CS reconstruction into the AI era, benefiting countless under-determined linear systems that rely on sparse solution. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11592v2-abstract-full').style.display = 'none'; document.getElementById('2501.11592v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.11216">arXiv:2501.11216</a> <span> [<a href="https://arxiv.org/pdf/2501.11216">pdf</a>, <a href="https://arxiv.org/format/2501.11216">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Databases">cs.DB</span> </div> </div> <p class="title is-5 mathjax"> TigerVector: Supporting Vector Search in Graph Databases for Advanced RAGs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shige Liu</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zhifang Zeng</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+L">Li Chen</a>, <a href="/search/cs?searchtype=author&query=Ainihaer%2C+A">Adil Ainihaer</a>, <a href="/search/cs?searchtype=author&query=Ramasami%2C+A">Arun Ramasami</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S">Songting Chen</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Y">Yu Xu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+M">Mingxi Wu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jianguo Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.11216v1-abstract-short" style="display: inline;"> In this paper, we introduce TigerVector, a system that integrates vector search and graph query within TigerGraph, a Massively Parallel Processing (MPP) native graph database. We extend the vertex attribute type with the embedding type. To support fast vector search, we devise an MPP index framework that interoperates efficiently with the graph engine. The graph query language GSQL is enhanced to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11216v1-abstract-full').style.display = 'inline'; document.getElementById('2501.11216v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.11216v1-abstract-full" style="display: none;"> In this paper, we introduce TigerVector, a system that integrates vector search and graph query within TigerGraph, a Massively Parallel Processing (MPP) native graph database. We extend the vertex attribute type with the embedding type. To support fast vector search, we devise an MPP index framework that interoperates efficiently with the graph engine. The graph query language GSQL is enhanced to support vector type expressions and enable query compositions between vector search results and graph query blocks. These advancements elevate the expressive power and analytical capabilities of graph databases, enabling seamless fusion of unstructured and structured data in ways previously unattainable. Through extensive experiments, we demonstrate TigerVector's hybrid search capability, scalability, and superior performance compared to other graph databases (including Neo4j and Amazon Neptune) and a highly optimized specialized vector database (Milvus). TigerVector was integrated into TigerGraph v4.2, the latest release of TigerGraph, in December 2024. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11216v1-abstract-full').style.display = 'none'; document.getElementById('2501.11216v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages,11 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> H.2 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.21151">arXiv:2412.21151</a> <span> [<a href="https://arxiv.org/pdf/2412.21151">pdf</a>, <a href="https://arxiv.org/format/2412.21151">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> PyG-SSL: A Graph Self-Supervised Learning Toolkit </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zheng%2C+L">Lecheng Zheng</a>, <a href="/search/cs?searchtype=author&query=Jing%2C+B">Baoyu Jing</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zihao Li</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zhichen Zeng</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+T">Tianxin Wei</a>, <a href="/search/cs?searchtype=author&query=Ai%2C+M">Mengting Ai</a>, <a href="/search/cs?searchtype=author&query=He%2C+X">Xinrui He</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+L">Lihui Liu</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+D">Dongqi Fu</a>, <a href="/search/cs?searchtype=author&query=You%2C+J">Jiaxuan You</a>, <a href="/search/cs?searchtype=author&query=Tong%2C+H">Hanghang Tong</a>, <a href="/search/cs?searchtype=author&query=He%2C+J">Jingrui He</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.21151v1-abstract-short" style="display: inline;"> Graph Self-Supervised Learning (SSL) has emerged as a pivotal area of research in recent years. By engaging in pretext tasks to learn the intricate topological structures and properties of graphs using unlabeled data, these graph SSL models achieve enhanced performance, improved generalization, and heightened robustness. Despite the remarkable achievements of these graph SSL methods, their current… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.21151v1-abstract-full').style.display = 'inline'; document.getElementById('2412.21151v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.21151v1-abstract-full" style="display: none;"> Graph Self-Supervised Learning (SSL) has emerged as a pivotal area of research in recent years. By engaging in pretext tasks to learn the intricate topological structures and properties of graphs using unlabeled data, these graph SSL models achieve enhanced performance, improved generalization, and heightened robustness. Despite the remarkable achievements of these graph SSL methods, their current implementation poses significant challenges for beginners and practitioners due to the complex nature of graph structures, inconsistent evaluation metrics, and concerns regarding reproducibility hinder further progress in this field. Recognizing the growing interest within the research community, there is an urgent need for a comprehensive, beginner-friendly, and accessible toolkit consisting of the most representative graph SSL algorithms. To address these challenges, we present a Graph SSL toolkit named PyG-SSL, which is built upon PyTorch and is compatible with various deep learning and scientific computing backends. Within the toolkit, we offer a unified framework encompassing dataset loading, hyper-parameter configuration, model training, and comprehensive performance evaluation for diverse downstream tasks. Moreover, we provide beginner-friendly tutorials and the best hyper-parameters of each graph SSL algorithm on different graph datasets, facilitating the reproduction of results. The GitHub repository of the library is https://github.com/iDEA-iSAIL-Lab-UIUC/pyg-ssl. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.21151v1-abstract-full').style.display = 'none'; document.getElementById('2412.21151v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.19282">arXiv:2412.19282</a> <span> [<a href="https://arxiv.org/pdf/2412.19282">pdf</a>, <a href="https://arxiv.org/format/2412.19282">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Reflective Gaussian Splatting </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yao%2C+Y">Yuxuan Yao</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zixuan Zeng</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+C">Chun Gu</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+X">Xiatian Zhu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Li Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.19282v2-abstract-short" style="display: inline;"> Novel view synthesis has experienced significant advancements owing to increasingly capable NeRF- and 3DGS-based methods. However, reflective object reconstruction remains challenging, lacking a proper solution to achieve real-time, high-quality rendering while accommodating inter-reflection. To fill this gap, we introduce a Reflective Gaussian splatting (Ref-Gaussian) framework characterized with… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.19282v2-abstract-full').style.display = 'inline'; document.getElementById('2412.19282v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.19282v2-abstract-full" style="display: none;"> Novel view synthesis has experienced significant advancements owing to increasingly capable NeRF- and 3DGS-based methods. However, reflective object reconstruction remains challenging, lacking a proper solution to achieve real-time, high-quality rendering while accommodating inter-reflection. To fill this gap, we introduce a Reflective Gaussian splatting (Ref-Gaussian) framework characterized with two components: (I) Physically based deferred rendering that empowers the rendering equation with pixel-level material properties via formulating split-sum approximation; (II) Gaussian-grounded inter-reflection that realizes the desired inter-reflection function within a Gaussian splatting paradigm for the first time. To enhance geometry modeling, we further introduce material-aware normal propagation and an initial per-Gaussian shading stage, along with 2D Gaussian primitives. Extensive experiments on standard datasets demonstrate that Ref-Gaussian surpasses existing approaches in terms of quantitative metrics, visual quality, and compute efficiency. Further, we show that our method serves as a unified solution for both reflective and non-reflective scenes, going beyond the previous alternatives focusing on only reflective scenes. Also, we illustrate that Ref-Gaussian supports more applications such as relighting and editing. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.19282v2-abstract-full').style.display = 'none'; document.getElementById('2412.19282v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted for ICLR 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.19048">arXiv:2412.19048</a> <span> [<a href="https://arxiv.org/pdf/2412.19048">pdf</a>, <a href="https://arxiv.org/format/2412.19048">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Jasper and Stella: distillation of SOTA embedding models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+D">Dun Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jiacheng Li</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Ziyang Zeng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+F">Fulong Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.19048v2-abstract-short" style="display: inline;"> A crucial component in many deep learning applications, such as Frequently Asked Questions (FAQ) and Retrieval-Augmented Generation (RAG), is dense retrieval. In this process, embedding models transform raw text into numerical vectors. However, the embedding models that currently excel on text embedding benchmarks, like the Massive Text Embedding Benchmark (MTEB), often have numerous parameters an… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.19048v2-abstract-full').style.display = 'inline'; document.getElementById('2412.19048v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.19048v2-abstract-full" style="display: none;"> A crucial component in many deep learning applications, such as Frequently Asked Questions (FAQ) and Retrieval-Augmented Generation (RAG), is dense retrieval. In this process, embedding models transform raw text into numerical vectors. However, the embedding models that currently excel on text embedding benchmarks, like the Massive Text Embedding Benchmark (MTEB), often have numerous parameters and high vector dimensionality. This poses challenges for their application in real-world scenarios. To address this issue, we propose a novel multi-stage distillation framework that enables a smaller student embedding model to distill multiple larger teacher embedding models through three carefully designed losses. Meanwhile, we utilize Matryoshka Representation Learning (MRL) to reduce the vector dimensionality of the student embedding model effectively. Our student model named Jasper with 2 billion parameters, built upon the Stella embedding model, obtained the No.3 position on the MTEB leaderboard (as of December 24, 2024), achieving an average 71.54 score across 56 datasets. We have released the model and data on the Hugging Face Hub (https://huggingface.co/infgrad/jasper_en_vision_language_v1) (https://huggingface.co/datasets/infgrad/jasper_text_distill_dataset), and the training codes are available in this project repository (https://github.com/NLPJCL/RAG-Retrieval). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.19048v2-abstract-full').style.display = 'none'; document.getElementById('2412.19048v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages, 1 figure</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.18419">arXiv:2412.18419</a> <span> [<a href="https://arxiv.org/pdf/2412.18419">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Research on the Proximity Relationships of Psychosomatic Disease Knowledge Graph Modules Extracted by Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+Z">Zihan Zhou</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Ziyi Zeng</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+W">Wenhao Jiang</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yihui Zhu</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+J">Jiaxin Mao</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+Y">Yonggui Yuan</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+M">Min Xia</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+S">Shubin Zhao</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+M">Mengyu Yao</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yunqian Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.18419v1-abstract-short" style="display: inline;"> As social changes accelerate, the incidence of psychosomatic disorders has significantly increased, becoming a major challenge in global health issues. This necessitates an innovative knowledge system and analytical methods to aid in diagnosis and treatment. Here, we establish the ontology model and entity types, using the BERT model and LoRA-tuned LLM for named entity recognition, constructing th… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18419v1-abstract-full').style.display = 'inline'; document.getElementById('2412.18419v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.18419v1-abstract-full" style="display: none;"> As social changes accelerate, the incidence of psychosomatic disorders has significantly increased, becoming a major challenge in global health issues. This necessitates an innovative knowledge system and analytical methods to aid in diagnosis and treatment. Here, we establish the ontology model and entity types, using the BERT model and LoRA-tuned LLM for named entity recognition, constructing the knowledge graph with 9668 triples. Next, by analyzing the network distances between disease, symptom, and drug modules, it was found that closer network distances among diseases can predict greater similarities in their clinical manifestations, treatment approaches, and psychological mechanisms, and closer distances between symptoms indicate that they are more likely to co-occur. Lastly, by comparing the proximity d and proximity z score, it was shown that symptom-disease pairs in primary diagnostic relationships have a stronger association and are of higher referential value than those in diagnostic relationships. The research results revealed the potential connections between diseases, co-occurring symptoms, and similarities in treatment strategies, providing new perspectives for the diagnosis and treatment of psychosomatic disorders and valuable information for future mental health research and practice. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18419v1-abstract-full').style.display = 'none'; document.getElementById('2412.18419v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.17848">arXiv:2412.17848</a> <span> [<a href="https://arxiv.org/pdf/2412.17848">pdf</a>, <a href="https://arxiv.org/format/2412.17848">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Overview of the 2024 ALTA Shared Task: Detect Automatic AI-Generated Sentences for Human-AI Hybrid Articles </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Moll%C3%A1%2C+D">Diego Moll谩</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Q">Qiongkai Xu</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zijie Zeng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhuang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.17848v1-abstract-short" style="display: inline;"> The ALTA shared tasks have been running annually since 2010. In 2024, the purpose of the task is to detect machine-generated text in a hybrid setting where the text may contain portions of human text and portions machine-generated. In this paper, we present the task, the evaluation criteria, and the results of the systems participating in the shared task. </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.17848v1-abstract-full" style="display: none;"> The ALTA shared tasks have been running annually since 2010. In 2024, the purpose of the task is to detect machine-generated text in a hybrid setting where the text may contain portions of human text and portions machine-generated. In this paper, we present the task, the evaluation criteria, and the results of the systems participating in the shared task. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.17848v1-abstract-full').style.display = 'none'; document.getElementById('2412.17848v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 3 tables, published in ALTA 2024</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Proceedings of the 22nd Annual Workshop of the Australasian Language Technology Association, 2024 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.17351">arXiv:2412.17351</a> <span> [<a href="https://arxiv.org/pdf/2412.17351">pdf</a>, <a href="https://arxiv.org/format/2412.17351">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Physics and Society">physics.soc-ph</span> </div> </div> <p class="title is-5 mathjax"> The evolution of cooperation in spatial public goods game with tolerant punishment based on reputation threshold </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+G">Gui Zhang</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+Y">Yichao Yao</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Ziyan Zeng</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+M">Minyu Feng</a>, <a href="/search/cs?searchtype=author&query=Chica%2C+M">Manuel Chica</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.17351v1-abstract-short" style="display: inline;"> Reputation and punishment are significant guidelines for regulating individual behavior in human society, and those with a good reputation are more likely to be imitated by others. In addition, society imposes varying degrees of punishment for behaviors that harm the interests of groups with different reputations. However, conventional pairwise interaction rules and the punishment mechanism overlo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.17351v1-abstract-full').style.display = 'inline'; document.getElementById('2412.17351v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.17351v1-abstract-full" style="display: none;"> Reputation and punishment are significant guidelines for regulating individual behavior in human society, and those with a good reputation are more likely to be imitated by others. In addition, society imposes varying degrees of punishment for behaviors that harm the interests of groups with different reputations. However, conventional pairwise interaction rules and the punishment mechanism overlook this aspect. Building on this observation, this paper enhances a spatial public goods game in two key ways: 1) We set a reputation threshold and use punishment to regulate the defection behavior of players in low-reputation groups while allowing defection behavior in high-reputation game groups. 2) Differently from pairwise interaction rules, we combine reputation and payoff as the fitness of individuals to ensure that players with both high payoff and reputation have a higher chance of being imitated. Through simulations, we find that a higher reputation threshold, combined with a stringent punishment environment, can substantially enhance the level of cooperation within the population. This mechanism provides deeper insight into the widespread phenomenon of cooperation that emerges among individuals. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.17351v1-abstract-full').style.display = 'none'; document.getElementById('2412.17351v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.16553">arXiv:2412.16553</a> <span> [<a href="https://arxiv.org/pdf/2412.16553">pdf</a>, <a href="https://arxiv.org/format/2412.16553">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Semantics Prompting Data-Free Quantization for Low-Bit Vision Transformers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhong%2C+Y">Yunshan Zhong</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yuyao Zhou</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yuxin Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shen Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yong Li</a>, <a href="/search/cs?searchtype=author&query=Chao%2C+F">Fei Chao</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zhanpeng Zeng</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+R">Rongrong Ji</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.16553v2-abstract-short" style="display: inline;"> Data-free quantization (DFQ), which facilitates model quantization without real data to address increasing concerns about data security, has garnered significant attention within the model compression community. Recently, the unique architecture of vision transformers (ViTs) has driven the development of specialized DFQ techniques. However, we observe that the synthetic images from existing method… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16553v2-abstract-full').style.display = 'inline'; document.getElementById('2412.16553v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.16553v2-abstract-full" style="display: none;"> Data-free quantization (DFQ), which facilitates model quantization without real data to address increasing concerns about data security, has garnered significant attention within the model compression community. Recently, the unique architecture of vision transformers (ViTs) has driven the development of specialized DFQ techniques. However, we observe that the synthetic images from existing methods suffer from the deficient semantics issue compared to real images, thereby compromising performance. Motivated by this, we propose SPDFQ, a Semantics Prompting Data-Free Quantization method for ViTs. First, SPDFQ incorporates Attention Priors Alignment (APA), which uses randomly generated attention priors to enhance the semantics of synthetic images. Second, SPDFQ introduces Multi-Semantic Reinforcement (MSR), which utilizes localized patch optimization to prompt efficient parameterization and diverse semantics in synthetic images. Finally, SPDFQ employs Softlabel Learning (SL), where soft learning targets are adapted to encourage more complex semantics and accommodate images augmented by MSR. Experimental results demonstrate that SPDFQ significantly outperforms existing methods. For instance, SPDFQ achieves a 15.52% increase in top-1 accuracy on ImageNet for W4A4 ViT-B <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16553v2-abstract-full').style.display = 'none'; document.getElementById('2412.16553v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.16435">arXiv:2412.16435</a> <span> [<a href="https://arxiv.org/pdf/2412.16435">pdf</a>, <a href="https://arxiv.org/format/2412.16435">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Social and Information Networks">cs.SI</span> </div> </div> <p class="title is-5 mathjax"> THeGCN: Temporal Heterophilic Graph Convolutional Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yuchen Yan</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yuzhong Chen</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+H">Huiyuan Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiaoting Li</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zhe Xu</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zhichen Zeng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+L">Lihui Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhining Liu</a>, <a href="/search/cs?searchtype=author&query=Tong%2C+H">Hanghang Tong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.16435v2-abstract-short" style="display: inline;"> Graph Neural Networks (GNNs) have exhibited remarkable efficacy in diverse graph learning tasks, particularly on static homophilic graphs. Recent attention has pivoted towards more intricate structures, encompassing (1) static heterophilic graphs encountering the edge heterophily issue in the spatial domain and (2) event-based continuous graphs in the temporal domain. State-of-the-art (SOTA) has b… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16435v2-abstract-full').style.display = 'inline'; document.getElementById('2412.16435v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.16435v2-abstract-full" style="display: none;"> Graph Neural Networks (GNNs) have exhibited remarkable efficacy in diverse graph learning tasks, particularly on static homophilic graphs. Recent attention has pivoted towards more intricate structures, encompassing (1) static heterophilic graphs encountering the edge heterophily issue in the spatial domain and (2) event-based continuous graphs in the temporal domain. State-of-the-art (SOTA) has been concurrently addressing these two lines of work but tends to overlook the presence of heterophily in the temporal domain, constituting the temporal heterophily issue. Furthermore, we highlight that the edge heterophily issue and the temporal heterophily issue often co-exist in event-based continuous graphs, giving rise to the temporal edge heterophily challenge. To tackle this challenge, this paper first introduces the temporal edge heterophily measurement. Subsequently, we propose the Temporal Heterophilic Graph Convolutional Network (THeGCN), an innovative model that incorporates the low/high-pass graph signal filtering technique to accurately capture both edge (spatial) heterophily and temporal heterophily. Specifically, the THeGCN model consists of two key components: a sampler and an aggregator. The sampler selects events relevant to a node at a given moment. Then, the aggregator executes message-passing, encoding temporal information, node attributes, and edge attributes into node embeddings. Extensive experiments conducted on 5 real-world datasets validate the efficacy of THeGCN. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16435v2-abstract-full').style.display = 'none'; document.getElementById('2412.16435v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 20 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.15867">arXiv:2412.15867</a> <span> [<a href="https://arxiv.org/pdf/2412.15867">pdf</a>, <a href="https://arxiv.org/format/2412.15867">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> IRGS: Inter-Reflective Gaussian Splatting with 2D Gaussian Ray Tracing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gu%2C+C">Chun Gu</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+X">Xiaofei Wei</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zixuan Zeng</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+Y">Yuxuan Yao</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Li Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.15867v1-abstract-short" style="display: inline;"> In inverse rendering, accurately modeling visibility and indirect radiance for incident light is essential for capturing secondary effects. Due to the absence of a powerful Gaussian ray tracer, previous 3DGS-based methods have either adopted a simplified rendering equation or used learnable parameters to approximate incident light, resulting in inaccurate material and lighting estimations. To this… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.15867v1-abstract-full').style.display = 'inline'; document.getElementById('2412.15867v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.15867v1-abstract-full" style="display: none;"> In inverse rendering, accurately modeling visibility and indirect radiance for incident light is essential for capturing secondary effects. Due to the absence of a powerful Gaussian ray tracer, previous 3DGS-based methods have either adopted a simplified rendering equation or used learnable parameters to approximate incident light, resulting in inaccurate material and lighting estimations. To this end, we introduce inter-reflective Gaussian splatting (IRGS) for inverse rendering. To capture inter-reflection, we apply the full rendering equation without simplification and compute incident radiance on the fly using the proposed differentiable 2D Gaussian ray tracing. Additionally, we present an efficient optimization scheme to handle the computational demands of Monte Carlo sampling for rendering equation evaluation. Furthermore, we introduce a novel strategy for querying the indirect radiance of incident light when relighting the optimized scenes. Extensive experiments on multiple standard benchmarks validate the effectiveness of IRGS, demonstrating its capability to accurately model complex inter-reflection effects. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.15867v1-abstract-full').style.display = 'none'; document.getElementById('2412.15867v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Project page: https://fudan-zvg.github.io/IRGS</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.15118">arXiv:2412.15118</a> <span> [<a href="https://arxiv.org/pdf/2412.15118">pdf</a>, <a href="https://arxiv.org/format/2412.15118">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Outcome-Refining Process Supervision for Code Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+Z">Zhuohao Yu</a>, <a href="/search/cs?searchtype=author&query=Gu%2C+W">Weizheng Gu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yidong Wang</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zhengran Zeng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jindong Wang</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+W">Wei Ye</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shikun Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.15118v1-abstract-short" style="display: inline;"> Large Language Models have demonstrated remarkable capabilities in code generation, yet they often struggle with complex programming tasks that require deep algorithmic reasoning. While process supervision through learned reward models shows promise in guiding reasoning steps, it requires expensive training data and suffers from unreliable evaluation. We propose Outcome-Refining Process Supervisio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.15118v1-abstract-full').style.display = 'inline'; document.getElementById('2412.15118v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.15118v1-abstract-full" style="display: none;"> Large Language Models have demonstrated remarkable capabilities in code generation, yet they often struggle with complex programming tasks that require deep algorithmic reasoning. While process supervision through learned reward models shows promise in guiding reasoning steps, it requires expensive training data and suffers from unreliable evaluation. We propose Outcome-Refining Process Supervision, a novel paradigm that treats outcome refinement itself as the process to be supervised. Our framework leverages concrete execution signals to ground the supervision of reasoning steps, while using tree-structured exploration to maintain multiple solution trajectories simultaneously. Experiments demonstrate that our approach enables even smaller models to achieve high success accuracy and performance metrics on competitive programming tasks, creates more reliable verification than traditional reward models without requiring training PRMs. Our approach achieves significant improvements across 5 models and 3 datasets: an average of 26.9% increase in correctness and 42.2% in efficiency. The results suggest that providing structured reasoning space with concrete verification signals is crucial for solving complex programming tasks. We open-source all our code and data at: https://github.com/zhuohaoyu/ORPS <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.15118v1-abstract-full').style.display = 'none'; document.getElementById('2412.15118v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 5 figures, Code: https://github.com/zhuohaoyu/ORPS</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.14686">arXiv:2412.14686</a> <span> [<a href="https://arxiv.org/pdf/2412.14686">pdf</a>, <a href="https://arxiv.org/format/2412.14686">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Each Fake News is Fake in its Own Way: An Attribution Multi-Granularity Benchmark for Multimodal Fake News Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guo%2C+H">Hao Guo</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+Z">Zihan Ma</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zhi Zeng</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+M">Minnan Luo</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+W">Weixin Zeng</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+J">Jiuyang Tang</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+X">Xiang Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.14686v1-abstract-short" style="display: inline;"> Social platforms, while facilitating access to information, have also become saturated with a plethora of fake news, resulting in negative consequences. Automatic multimodal fake news detection is a worthwhile pursuit. Existing multimodal fake news datasets only provide binary labels of real or fake. However, real news is alike, while each fake news is fake in its own way. These datasets fail to r… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.14686v1-abstract-full').style.display = 'inline'; document.getElementById('2412.14686v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.14686v1-abstract-full" style="display: none;"> Social platforms, while facilitating access to information, have also become saturated with a plethora of fake news, resulting in negative consequences. Automatic multimodal fake news detection is a worthwhile pursuit. Existing multimodal fake news datasets only provide binary labels of real or fake. However, real news is alike, while each fake news is fake in its own way. These datasets fail to reflect the mixed nature of various types of multimodal fake news. To bridge the gap, we construct an attributing multi-granularity multimodal fake news detection dataset \amg, revealing the inherent fake pattern. Furthermore, we propose a multi-granularity clue alignment model \our to achieve multimodal fake news detection and attribution. Experimental results demonstrate that \amg is a challenging dataset, and its attribution setting opens up new avenues for future research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.14686v1-abstract-full').style.display = 'none'; document.getElementById('2412.14686v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.14135">arXiv:2412.14135</a> <span> [<a href="https://arxiv.org/pdf/2412.14135">pdf</a>, <a href="https://arxiv.org/format/2412.14135">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Scaling of Search and Learning: A Roadmap to Reproduce o1 from Reinforcement Learning Perspective </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zhiyuan Zeng</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+Q">Qinyuan Cheng</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+Z">Zhangyue Yin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bo Wang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+S">Shimin Li</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yunhua Zhou</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Q">Qipeng Guo</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+X">Xuanjing Huang</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+X">Xipeng Qiu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.14135v1-abstract-short" style="display: inline;"> OpenAI o1 represents a significant milestone in Artificial Inteiligence, which achieves expert-level performances on many challanging tasks that require strong reasoning ability.OpenAI has claimed that the main techinique behinds o1 is the reinforcement learining. Recent works use alternative approaches like knowledge distillation to imitate o1's reasoning style, but their effectiveness is limited… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.14135v1-abstract-full').style.display = 'inline'; document.getElementById('2412.14135v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.14135v1-abstract-full" style="display: none;"> OpenAI o1 represents a significant milestone in Artificial Inteiligence, which achieves expert-level performances on many challanging tasks that require strong reasoning ability.OpenAI has claimed that the main techinique behinds o1 is the reinforcement learining. Recent works use alternative approaches like knowledge distillation to imitate o1's reasoning style, but their effectiveness is limited by the capability ceiling of the teacher model. Therefore, this paper analyzes the roadmap to achieving o1 from the perspective of reinforcement learning, focusing on four key components: policy initialization, reward design, search, and learning. Policy initialization enables models to develop human-like reasoning behaviors, equipping them with the ability to effectively explore solution spaces for complex problems. Reward design provides dense and effective signals via reward shaping or reward modeling, which is the guidance for both search and learning. Search plays a crucial role in generating high-quality solutions during both training and testing phases, which can produce better solutions with more computation. Learning utilizes the data generated by search for improving policy, which can achieve the better performance with more parameters and more searched data. Existing open-source projects that attempt to reproduce o1 can be seem as a part or a variant of our roadmap. Collectively, these components underscore how learning and search drive o1's advancement, making meaningful contributions to the development of LLM. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.14135v1-abstract-full').style.display = 'none'; document.getElementById('2412.14135v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.13735">arXiv:2412.13735</a> <span> [<a href="https://arxiv.org/pdf/2412.13735">pdf</a>, <a href="https://arxiv.org/format/2412.13735">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> 3D Registration in 30 Years: A Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jiaqi Yang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chu'ai Zhang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhengbao Wang</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+X">Xinyue Cao</a>, <a href="/search/cs?searchtype=author&query=Ouyang%2C+X">Xuan Ouyang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiyu Zhang</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zhenxuan Zeng</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zhao Zeng</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+B">Borui Lu</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+Z">Zhiyi Xia</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Q">Qian Zhang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Y">Yulan Guo</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Y">Yanning Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.13735v2-abstract-short" style="display: inline;"> 3D point cloud registration is a fundamental problem in computer vision, computer graphics, robotics, remote sensing, and etc. Over the last thirty years, we have witnessed the amazing advancement in this area with numerous kinds of solutions. Although a handful of relevant surveys have been conducted, their coverage is still limited. In this work, we present a comprehensive survey on 3D point clo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.13735v2-abstract-full').style.display = 'inline'; document.getElementById('2412.13735v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.13735v2-abstract-full" style="display: none;"> 3D point cloud registration is a fundamental problem in computer vision, computer graphics, robotics, remote sensing, and etc. Over the last thirty years, we have witnessed the amazing advancement in this area with numerous kinds of solutions. Although a handful of relevant surveys have been conducted, their coverage is still limited. In this work, we present a comprehensive survey on 3D point cloud registration, covering a set of sub-areas such as pairwise coarse registration, pairwise fine registration, multi-view registration, cross-scale registration, and multi-instance registration. The datasets, evaluation metrics, method taxonomy, discussions of the merits and demerits, insightful thoughts of future directions are comprehensively presented in this survey. The regularly updated project page of the survey is available at https://github.com/Amyyyy11/3D-Registration-in-30-Years-A-Survey. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.13735v2-abstract-full').style.display = 'none'; document.getElementById('2412.13735v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.11063">arXiv:2412.11063</a> <span> [<a href="https://arxiv.org/pdf/2412.11063">pdf</a>, <a href="https://arxiv.org/format/2412.11063">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> LAW: Legal Agentic Workflows for Custody and Fund Services Contracts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Watson%2C+W">William Watson</a>, <a href="/search/cs?searchtype=author&query=Cho%2C+N">Nicole Cho</a>, <a href="/search/cs?searchtype=author&query=Srishankar%2C+N">Nishan Srishankar</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zhen Zeng</a>, <a href="/search/cs?searchtype=author&query=Cecchi%2C+L">Lucas Cecchi</a>, <a href="/search/cs?searchtype=author&query=Scott%2C+D">Daniel Scott</a>, <a href="/search/cs?searchtype=author&query=Siddagangappa%2C+S">Suchetha Siddagangappa</a>, <a href="/search/cs?searchtype=author&query=Kaur%2C+R">Rachneet Kaur</a>, <a href="/search/cs?searchtype=author&query=Balch%2C+T">Tucker Balch</a>, <a href="/search/cs?searchtype=author&query=Veloso%2C+M">Manuela Veloso</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.11063v1-abstract-short" style="display: inline;"> Legal contracts in the custody and fund services domain govern critical aspects such as key provider responsibilities, fee schedules, and indemnification rights. However, it is challenging for an off-the-shelf Large Language Model (LLM) to ingest these contracts due to the lengthy unstructured streams of text, limited LLM context windows, and complex legal jargon. To address these challenges, we i… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.11063v1-abstract-full').style.display = 'inline'; document.getElementById('2412.11063v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.11063v1-abstract-full" style="display: none;"> Legal contracts in the custody and fund services domain govern critical aspects such as key provider responsibilities, fee schedules, and indemnification rights. However, it is challenging for an off-the-shelf Large Language Model (LLM) to ingest these contracts due to the lengthy unstructured streams of text, limited LLM context windows, and complex legal jargon. To address these challenges, we introduce LAW (Legal Agentic Workflows for Custody and Fund Services Contracts). LAW features a modular design that responds to user queries by orchestrating a suite of domain-specific tools and text agents. Our experiments demonstrate that LAW, by integrating multiple specialized agents and tools, significantly outperforms the baseline. LAW excels particularly in complex tasks such as calculating a contract's termination date, surpassing the baseline by 92.9% points. Furthermore, LAW offers a cost-effective alternative to traditional fine-tuned legal LLMs by leveraging reusable, domain-specific tools. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.11063v1-abstract-full').style.display = 'none'; document.getElementById('2412.11063v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at The 31st International Conference on Computational Linguistics (COLING 2025)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.10087">arXiv:2412.10087</a> <span> [<a href="https://arxiv.org/pdf/2412.10087">pdf</a>, <a href="https://arxiv.org/format/2412.10087">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Consensus-Based Dynamic Task Allocation for Multi-Robot System Considering Payloads Consumption </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qiu%2C+X">Xuekai Qiu</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+P">Pengming Zhu</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+Y">Yiming Hu</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zhiwen Zeng</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+H">Huimin Lu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.10087v1-abstract-short" style="display: inline;"> This paper presents a consensus-based payload algorithm (CBPA) to deal with the condition of robots' capability decrease for multi-robot task allocation. During the execution of complex tasks, robots' capabilities could decrease with the consumption of payloads, which causes a problem that the robot coalition would not meet the tasks' requirements in real time. The proposed CBPA is an enhanced ver… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.10087v1-abstract-full').style.display = 'inline'; document.getElementById('2412.10087v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.10087v1-abstract-full" style="display: none;"> This paper presents a consensus-based payload algorithm (CBPA) to deal with the condition of robots' capability decrease for multi-robot task allocation. During the execution of complex tasks, robots' capabilities could decrease with the consumption of payloads, which causes a problem that the robot coalition would not meet the tasks' requirements in real time. The proposed CBPA is an enhanced version of the consensus-based bundle algorithm (CBBA) and comprises two primary core phases: the payload bundle construction and consensus phases. In the payload bundle construction phase, CBPA introduces a payload assignment matrix to track the payloads carried by the robots and the demands of multi-robot tasks in real time. Then, robots share their respective payload assignment matrix in the consensus phase. These two phases are iterated to dynamically adjust the number of robots performing multi-robot tasks and the number of tasks each robot performs and obtain conflict-free results to ensure that the robot coalition meets the demand and completes all tasks as quickly as possible. Physical experiment shows that CBPA is appropriate in complex and dynamic scenarios where robots need to collaborate and task requirements are tightly coupled to the robots' payloads. Numerical experiments show that CBPA has higher total task gains than CBBA. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.10087v1-abstract-full').style.display = 'none'; document.getElementById('2412.10087v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.07200">arXiv:2412.07200</a> <span> [<a href="https://arxiv.org/pdf/2412.07200">pdf</a>, <a href="https://arxiv.org/format/2412.07200">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Modifying AI, Enhancing Essays: How Active Engagement with Generative AI Boosts Writing Quality </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+K">Kaixun Yang</a>, <a href="/search/cs?searchtype=author&query=Rakovi%C4%87%2C+M">Mladen Rakovi膰</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+Z">Zhiping Liang</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+L">Lixiang Yan</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zijie Zeng</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+Y">Yizhou Fan</a>, <a href="/search/cs?searchtype=author&query=Ga%C5%A1evi%C4%87%2C+D">Dragan Ga拧evi膰</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+G">Guanliang Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.07200v1-abstract-short" style="display: inline;"> Students are increasingly relying on Generative AI (GAI) to support their writing-a key pedagogical practice in education. In GAI-assisted writing, students can delegate core cognitive tasks (e.g., generating ideas and turning them into sentences) to GAI while still producing high-quality essays. This creates new challenges for teachers in assessing and supporting student learning, as they often l… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.07200v1-abstract-full').style.display = 'inline'; document.getElementById('2412.07200v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.07200v1-abstract-full" style="display: none;"> Students are increasingly relying on Generative AI (GAI) to support their writing-a key pedagogical practice in education. In GAI-assisted writing, students can delegate core cognitive tasks (e.g., generating ideas and turning them into sentences) to GAI while still producing high-quality essays. This creates new challenges for teachers in assessing and supporting student learning, as they often lack insight into whether students are engaging in meaningful cognitive processes during writing or how much of the essay's quality can be attributed to those processes. This study aimed to help teachers better assess and support student learning in GAI-assisted writing by examining how different writing behaviors, especially those indicative of meaningful learning versus those that are not, impact essay quality. Using a dataset of 1,445 GAI-assisted writing sessions, we applied the cutting-edge method, X-Learner, to quantify the causal impact of three GAI-assisted writing behavioral patterns (i.e., seeking suggestions but not accepting them, seeking suggestions and accepting them as they are, and seeking suggestions and accepting them with modification) on four measures of essay quality (i.e., lexical sophistication, syntactic complexity, text cohesion, and linguistic bias). Our analysis showed that writers who frequently modified GAI-generated text-suggesting active engagement in higher-order cognitive processes-consistently improved the quality of their essays in terms of lexical sophistication, syntactic complexity, and text cohesion. In contrast, those who often accepted GAI-generated text without changes, primarily engaging in lower-order processes, saw a decrease in essay quality. Additionally, while human writers tend to introduce linguistic bias when writing independently, incorporating GAI-generated text-even without modification-can help mitigate this bias. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.07200v1-abstract-full').style.display = 'none'; document.getElementById('2412.07200v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.03897">arXiv:2412.03897</a> <span> [<a href="https://arxiv.org/pdf/2412.03897">pdf</a>, <a href="https://arxiv.org/format/2412.03897">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TGRS.2024.3478385">10.1109/TGRS.2024.3478385 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Multisource Collaborative Domain Generalization for Cross-Scene Remote Sensing Image Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Han%2C+Z">Zhu Han</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Ce Zhang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+L">Lianru Gao</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zhiqiang Zeng</a>, <a href="/search/cs?searchtype=author&query=Ng%2C+M+K">Michael K. Ng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+B">Bing Zhang</a>, <a href="/search/cs?searchtype=author&query=Chanussot%2C+J">Jocelyn Chanussot</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.03897v1-abstract-short" style="display: inline;"> Cross-scene image classification aims to transfer prior knowledge of ground materials to annotate regions with different distributions and reduce hand-crafted cost in the field of remote sensing. However, existing approaches focus on single-source domain generalization to unseen target domains, and are easily confused by large real-world domain shifts due to the limited training information and in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.03897v1-abstract-full').style.display = 'inline'; document.getElementById('2412.03897v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.03897v1-abstract-full" style="display: none;"> Cross-scene image classification aims to transfer prior knowledge of ground materials to annotate regions with different distributions and reduce hand-crafted cost in the field of remote sensing. However, existing approaches focus on single-source domain generalization to unseen target domains, and are easily confused by large real-world domain shifts due to the limited training information and insufficient diversity modeling capacity. To address this gap, we propose a novel multi-source collaborative domain generalization framework (MS-CDG) based on homogeneity and heterogeneity characteristics of multi-source remote sensing data, which considers data-aware adversarial augmentation and model-aware multi-level diversification simultaneously to enhance cross-scene generalization performance. The data-aware adversarial augmentation adopts an adversary neural network with semantic guide to generate MS samples by adaptively learning realistic channel and distribution changes across domains. In views of cross-domain and intra-domain modeling, the model-aware diversification transforms the shared spatial-channel features of MS data into the class-wise prototype and kernel mixture module, to address domain discrepancies and cluster different classes effectively. Finally, the joint classification of original and augmented MS samples is employed by introducing a distribution consistency alignment to increase model diversity and ensure better domain-invariant representation learning. Extensive experiments on three public MS remote sensing datasets demonstrate the superior performance of the proposed method when benchmarked with the state-of-the-art methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.03897v1-abstract-full').style.display = 'none'; document.getElementById('2412.03897v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.03893">arXiv:2412.03893</a> <span> [<a href="https://arxiv.org/pdf/2412.03893">pdf</a>, <a href="https://arxiv.org/format/2412.03893">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TGRS.2024.3418583">10.1109/TGRS.2024.3418583 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Dual-Branch Subpixel-Guided Network for Hyperspectral Image Classification </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Han%2C+Z">Zhu Han</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jin Yang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+L">Lianru Gao</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zhiqiang Zeng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+B">Bing Zhang</a>, <a href="/search/cs?searchtype=author&query=Chanussot%2C+J">Jocelyn Chanussot</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.03893v1-abstract-short" style="display: inline;"> Deep learning (DL) has been widely applied into hyperspectral image (HSI) classification owing to its promising feature learning and representation capabilities. However, limited by the spatial resolution of sensors, existing DL-based classification approaches mainly focus on pixel-level spectral and spatial information extraction through complex network architecture design, while ignoring the exi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.03893v1-abstract-full').style.display = 'inline'; document.getElementById('2412.03893v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.03893v1-abstract-full" style="display: none;"> Deep learning (DL) has been widely applied into hyperspectral image (HSI) classification owing to its promising feature learning and representation capabilities. However, limited by the spatial resolution of sensors, existing DL-based classification approaches mainly focus on pixel-level spectral and spatial information extraction through complex network architecture design, while ignoring the existence of mixed pixels in actual scenarios. To tackle this difficulty, we propose a novel dual-branch subpixel-guided network for HSI classification, called DSNet, which automatically integrates subpixel information and convolutional class features by introducing a deep autoencoder unmixing architecture to enhance classification performance. DSNet is capable of fully considering physically nonlinear properties within subpixels and adaptively generating diagnostic abundances in an unsupervised manner to achieve more reliable decision boundaries for class label distributions. The subpixel fusion module is designed to ensure high-quality information fusion across pixel and subpixel features, further promoting stable joint classification. Experimental results on three benchmark datasets demonstrate the effectiveness and superiority of DSNet compared with state-of-the-art DL-based HSI classification approaches. The codes will be available at https://github.com/hanzhu97702/DSNet, contributing to the remote sensing community. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.03893v1-abstract-full').style.display = 'none'; document.getElementById('2412.03893v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.02807">arXiv:2412.02807</a> <span> [<a href="https://arxiv.org/pdf/2412.02807">pdf</a>, <a href="https://arxiv.org/format/2412.02807">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Systems and Control">eess.SY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Dynamical Systems">math.DS</span> </div> </div> <p class="title is-5 mathjax"> Learning Koopman-based Stability Certificates for Unknown Nonlinear Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+R">Ruikun Zhou</a>, <a href="/search/cs?searchtype=author&query=Meng%2C+Y">Yiming Meng</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zhexuan Zeng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jun Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.02807v1-abstract-short" style="display: inline;"> Koopman operator theory has gained significant attention in recent years for identifying discrete-time nonlinear systems by embedding them into an infinite-dimensional linear vector space. However, providing stability guarantees while learning the continuous-time dynamics, especially under conditions of relatively low observation frequency, remains a challenge within the existing Koopman-based lea… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.02807v1-abstract-full').style.display = 'inline'; document.getElementById('2412.02807v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.02807v1-abstract-full" style="display: none;"> Koopman operator theory has gained significant attention in recent years for identifying discrete-time nonlinear systems by embedding them into an infinite-dimensional linear vector space. However, providing stability guarantees while learning the continuous-time dynamics, especially under conditions of relatively low observation frequency, remains a challenge within the existing Koopman-based learning frameworks. To address this challenge, we propose an algorithmic framework to simultaneously learn the vector field and Lyapunov functions for unknown nonlinear systems, using a limited amount of data sampled across the state space and along the trajectories at a relatively low sampling frequency. The proposed framework builds upon recently developed high-accuracy Koopman generator learning for capturing transient system transitions and physics-informed neural networks for training Lyapunov functions. We show that the learned Lyapunov functions can be formally verified using a satisfiability modulo theories (SMT) solver and provide less conservative estimates of the region of attraction compared to existing methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.02807v1-abstract-full').style.display = 'none'; document.getElementById('2412.02807v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to L4DC 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.18671">arXiv:2411.18671</a> <span> [<a href="https://arxiv.org/pdf/2411.18671">pdf</a>, <a href="https://arxiv.org/format/2411.18671">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> TAPTRv3: Spatial and Temporal Context Foster Robust Tracking of Any Point in Long Video </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Qu%2C+J">Jinyuan Qu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Hongyang Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shilong Liu</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+T">Tianhe Ren</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zhaoyang Zeng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lei Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.18671v1-abstract-short" style="display: inline;"> In this paper, we present TAPTRv3, which is built upon TAPTRv2 to improve its point tracking robustness in long videos. TAPTRv2 is a simple DETR-like framework that can accurately track any point in real-world videos without requiring cost-volume. TAPTRv3 improves TAPTRv2 by addressing its shortage in querying high quality features from long videos, where the target tracking points normally underg… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18671v1-abstract-full').style.display = 'inline'; document.getElementById('2411.18671v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.18671v1-abstract-full" style="display: none;"> In this paper, we present TAPTRv3, which is built upon TAPTRv2 to improve its point tracking robustness in long videos. TAPTRv2 is a simple DETR-like framework that can accurately track any point in real-world videos without requiring cost-volume. TAPTRv3 improves TAPTRv2 by addressing its shortage in querying high quality features from long videos, where the target tracking points normally undergo increasing variation over time. In TAPTRv3, we propose to utilize both spatial and temporal context to bring better feature querying along the spatial and temporal dimensions for more robust tracking in long videos. For better spatial feature querying, we present Context-aware Cross-Attention (CCA), which leverages surrounding spatial context to enhance the quality of attention scores when querying image features. For better temporal feature querying, we introduce Visibility-aware Long-Temporal Attention (VLTA) to conduct temporal attention to all past frames while considering their corresponding visibilities, which effectively addresses the feature drifting problem in TAPTRv2 brought by its RNN-like long-temporal modeling. TAPTRv3 surpasses TAPTRv2 by a large margin on most of the challenging datasets and obtains state-of-the-art performance. Even when compared with methods trained with large-scale extra internal data, TAPTRv3 is still competitive. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18671v1-abstract-full').style.display = 'none'; document.getElementById('2411.18671v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.18533">arXiv:2411.18533</a> <span> [<a href="https://arxiv.org/pdf/2411.18533">pdf</a>, <a href="https://arxiv.org/format/2411.18533">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Utilizing the Mean Teacher with Supcontrast Loss for Wafer Pattern Recognition </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Wei%2C+Q">Qiyu Wei</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+X">Xun Xu</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zeng Zeng</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xulei Yang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.18533v1-abstract-short" style="display: inline;"> The patterns on wafer maps play a crucial role in helping engineers identify the causes of production issues during semiconductor manufacturing. In order to reduce costs and improve accuracy, automation technology is essential, and recent developments in deep learning have led to impressive results in wafer map pattern recognition. In this context, inspired by the effectiveness of semi-supervised… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18533v1-abstract-full').style.display = 'inline'; document.getElementById('2411.18533v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.18533v1-abstract-full" style="display: none;"> The patterns on wafer maps play a crucial role in helping engineers identify the causes of production issues during semiconductor manufacturing. In order to reduce costs and improve accuracy, automation technology is essential, and recent developments in deep learning have led to impressive results in wafer map pattern recognition. In this context, inspired by the effectiveness of semi-supervised learning and contrastive learning methods, we introduce an innovative approach that integrates the Mean Teacher framework with the supervised contrastive learning loss for enhanced wafer map pattern recognition. Our methodology not only addresses the nuances of wafer patterns but also tackles challenges arising from limited labeled data. To further refine the process, we address data imbalance in the wafer dataset by employing SMOTE and under-sampling techniques. We conduct a comprehensive analysis of our proposed method and demonstrate its effectiveness through experiments using real-world dataset WM811K obtained from semiconductor manufacturers. Compared to the baseline method, our method has achieved 5.46%, 6.68%, 5.42%, and 4.53% improvements in Accuracy, Precision, Recall, and F1 score, respectively. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18533v1-abstract-full').style.display = 'none'; document.getElementById('2411.18533v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages,1 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.18363">arXiv:2411.18363</a> <span> [<a href="https://arxiv.org/pdf/2411.18363">pdf</a>, <a href="https://arxiv.org/format/2411.18363">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> ChatRex: Taming Multimodal LLM for Joint Perception and Understanding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jiang%2C+Q">Qing Jiang</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+G">Gen Luo</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yuqin Yang</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+Y">Yuda Xiong</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yihao Chen</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zhaoyang Zeng</a>, <a href="/search/cs?searchtype=author&query=Ren%2C+T">Tianhe Ren</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+L">Lei Zhang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.18363v2-abstract-short" style="display: inline;"> Perception and understanding are two pillars of computer vision. While multimodal large language models (MLLM) have demonstrated remarkable visual understanding capabilities, they arguably lack accurate perception abilities, e.g. the stage-of-the-art model Qwen2-VL only achieves a 43.9 recall rate on the COCO dataset, limiting many tasks requiring the combination of perception and understanding. I… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18363v2-abstract-full').style.display = 'inline'; document.getElementById('2411.18363v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.18363v2-abstract-full" style="display: none;"> Perception and understanding are two pillars of computer vision. While multimodal large language models (MLLM) have demonstrated remarkable visual understanding capabilities, they arguably lack accurate perception abilities, e.g. the stage-of-the-art model Qwen2-VL only achieves a 43.9 recall rate on the COCO dataset, limiting many tasks requiring the combination of perception and understanding. In this work, we aim to bridge this perception gap from both model designing and data development perspectives. We first introduce ChatRex, an MLLM with a decoupled perception design. Instead of having the LLM directly predict box coordinates, we feed the output boxes from a universal proposal network into the LLM, allowing it to output the corresponding box indices to represent its detection results, turning the regression task into a retrieval-based task that LLM handles more proficiently. From the data perspective, we build a fully automated data engine and construct the Rexverse-2M dataset which possesses multiple granularities to support the joint training of perception and understanding. After standard two-stage training, ChatRex demonstrates strong perception capabilities while preserving multimodal understanding performance. The combination of these two capabilities simultaneously unlocks many attractive applications, demonstrating the complementary roles of both perception and understanding in MLLM. Code is available at \url{https://github.com/IDEA-Research/ChatRex}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18363v2-abstract-full').style.display = 'none'; document.getElementById('2411.18363v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">35 pages, 19 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.16750">arXiv:2411.16750</a> <span> [<a href="https://arxiv.org/pdf/2411.16750">pdf</a>, <a href="https://arxiv.org/format/2411.16750">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multimedia">cs.MM</span> </div> </div> <p class="title is-5 mathjax"> PriorDiffusion: Leverage Language Prior in Diffusion Models for Monocular Depth Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Ziyao Zeng</a>, <a href="/search/cs?searchtype=author&query=Ni%2C+J">Jingcheng Ni</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+D">Daniel Wang</a>, <a href="/search/cs?searchtype=author&query=Rim%2C+P">Patrick Rim</a>, <a href="/search/cs?searchtype=author&query=Chung%2C+Y">Younjoon Chung</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+F">Fengyu Yang</a>, <a href="/search/cs?searchtype=author&query=Hong%2C+B">Byung-Woo Hong</a>, <a href="/search/cs?searchtype=author&query=Wong%2C+A">Alex Wong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.16750v1-abstract-short" style="display: inline;"> This paper explores the potential of leveraging language priors learned by text-to-image diffusion models to address ambiguity and visual nuisance in monocular depth estimation. Particularly, traditional monocular depth estimation suffers from inherent ambiguity due to the absence of stereo or multi-view depth cues, and nuisance due to lack of robustness of vision. We argue that language prior in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.16750v1-abstract-full').style.display = 'inline'; document.getElementById('2411.16750v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.16750v1-abstract-full" style="display: none;"> This paper explores the potential of leveraging language priors learned by text-to-image diffusion models to address ambiguity and visual nuisance in monocular depth estimation. Particularly, traditional monocular depth estimation suffers from inherent ambiguity due to the absence of stereo or multi-view depth cues, and nuisance due to lack of robustness of vision. We argue that language prior in diffusion models can enhance monocular depth estimation by leveraging the geometric prior aligned with the language description, which is learned during text-to-image pre-training. To generate images that reflect the text properly, the model must comprehend the size and shape of specified objects, their spatial relationship, and the scale of the scene. Thus, we propose PriorDiffusion, using a pre-trained text-to-image diffusion model that takes both image and text description that aligned with the scene to infer affine-invariant depth through a denoising process. We also show that language priors can guide the model's attention to specific regions and help it perceive the 3D scene in alignment with user intent. Simultaneously, it acts as a constraint to accelerate the convergence of the diffusion trajectory, since learning 3D properties from a condensed, low-dimensional language feature is more efficient compared with learning from a redundant, high-dimensional image feature. By training on HyperSim and Virtual KITTI, we achieve state-of-the-art zero-shot performance and a faster convergence speed, compared with other diffusion-based depth estimators, across NYUv2, KITTI, ETH3D, and ScanNet. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.16750v1-abstract-full').style.display = 'none'; document.getElementById('2411.16750v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.16206">arXiv:2411.16206</a> <span> [<a href="https://arxiv.org/pdf/2411.16206">pdf</a>, <a href="https://arxiv.org/format/2411.16206">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Neural and Evolutionary Computing">cs.NE</span> </div> </div> <p class="title is-5 mathjax"> Batch Bayesian Optimization via Expected Subspace Improvement </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhan%2C+D">Dawei Zhan</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zhaoxi Zeng</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+S">Shuoxiao Wei</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+P">Ping Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.16206v1-abstract-short" style="display: inline;"> Extending Bayesian optimization to batch evaluation can enable the designer to make the most use of parallel computing technology. Most of current batch approaches use artificial functions to simulate the sequential Bayesian optimization algorithm's behavior to select a batch of points for parallel evaluation. However, as the batch size grows, the accumulated error introduced by these artificial f… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.16206v1-abstract-full').style.display = 'inline'; document.getElementById('2411.16206v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.16206v1-abstract-full" style="display: none;"> Extending Bayesian optimization to batch evaluation can enable the designer to make the most use of parallel computing technology. Most of current batch approaches use artificial functions to simulate the sequential Bayesian optimization algorithm's behavior to select a batch of points for parallel evaluation. However, as the batch size grows, the accumulated error introduced by these artificial functions increases rapidly, which dramatically decreases the optimization efficiency of the algorithm. In this work, we propose a simple and efficient approach to extend Bayesian optimization to batch evaluation. Different from existing batch approaches, the idea of the new approach is to draw a batch of subspaces of the original problem and select one acquisition point from each subspace. To achieve this, we propose the expected subspace improvement criterion to measure the amount of the improvement that a candidate point can achieve within a certain subspace. By optimizing these expected subspace improvement functions simultaneously, we can get a batch of query points for expensive evaluation. Numerical experiments show that our proposed approach can achieve near-linear speedup when compared with the sequential Bayesian optimization algorithm, and performs very competitively when compared with eight state-of-the-art batch algorithms. This work provides a simple yet efficient approach for batch Bayesian optimization. A Matlab implementation of our approach is available at https://github.com/zhandawei/Expected_Subspace_Improvement_Batch_Bayesian_Optimization <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.16206v1-abstract-full').style.display = 'none'; document.getElementById('2411.16206v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14790">arXiv:2411.14790</a> <span> [<a href="https://arxiv.org/pdf/2411.14790">pdf</a>, <a href="https://arxiv.org/format/2411.14790">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> KBAlign: Efficient Self Adaptation on Specific Knowledge Bases </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zeng%2C+Z">Zheni Zeng</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yuxuan Chen</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+S">Shi Yu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+R">Ruobing Wang</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Y">Yukun Yan</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhenghao Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuo Wang</a>, <a href="/search/cs?searchtype=author&query=Han%2C+X">Xu Han</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zhiyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+M">Maosong Sun</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14790v3-abstract-short" style="display: inline;"> Humans can utilize techniques to quickly acquire knowledge from specific materials in advance, such as creating self-assessment questions, enabling us to achieving related tasks more efficiently. In contrast, large language models (LLMs) usually relies on retrieval-augmented generation to exploit knowledge materials in an instant manner, or requires external signals such as human preference data a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14790v3-abstract-full').style.display = 'inline'; document.getElementById('2411.14790v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14790v3-abstract-full" style="display: none;"> Humans can utilize techniques to quickly acquire knowledge from specific materials in advance, such as creating self-assessment questions, enabling us to achieving related tasks more efficiently. In contrast, large language models (LLMs) usually relies on retrieval-augmented generation to exploit knowledge materials in an instant manner, or requires external signals such as human preference data and stronger LLM annotations to conduct knowledge adaptation. To unleash the self-learning potential of LLMs, we propose KBAlign, an approach designed for efficient adaptation to downstream tasks involving knowledge bases. Our method utilizes iterative training with self-annotated data such as Q&A pairs and revision suggestions, enabling the model to grasp the knowledge content efficiently. Experimental results on multiple datasets demonstrate the effectiveness of our approach, significantly boosting model performance in downstream tasks that require specific knowledge at a low cost. Notably, our approach achieves over 90% of the performance improvement that can be obtained by using GPT-4-turbo annotation, while relying entirely on self-supervision. We release our experimental data, models, and process analyses to the community for further exploration (https://github.com/thunlp/KBAlign). <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14790v3-abstract-full').style.display = 'none'; document.getElementById('2411.14790v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Zeng%2C+Z&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Zeng%2C+Z&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Zeng%2C+Z&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Zeng%2C+Z&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Zeng%2C+Z&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Zeng%2C+Z&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository