CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–50 of 863 results for author: <span class="mathjax">Cheng, X</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Cheng%2C+X">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Cheng, X"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Cheng%2C+X&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Cheng, X"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Cheng%2C+X&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Cheng%2C+X&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Cheng%2C+X&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Cheng%2C+X&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Cheng%2C+X&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Cheng%2C+X&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13719">arXiv:2502.13719</a> <span> [<a href="https://arxiv.org/pdf/2502.13719">pdf</a>, <a href="https://arxiv.org/format/2502.13719">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> TrustRAG: An Information Assistant with Retrieval Augmented Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fan%2C+Y">Yixing Fan</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+Q">Qiang Yan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+W">Wenshan Wang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+J">Jiafeng Guo</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruqing Zhang</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xueqi Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13719v1-abstract-short" style="display: inline;"> \Ac{RAG} has emerged as a crucial technique for enhancing large models with real-time and domain-specific knowledge. While numerous improvements and open-source tools have been proposed to refine the \ac{RAG} framework for accuracy, relatively little attention has been given to improving the trustworthiness of generated results. To address this gap, we introduce TrustRAG, a novel framework that en… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13719v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13719v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13719v1-abstract-full" style="display: none;"> \Ac{RAG} has emerged as a crucial technique for enhancing large models with real-time and domain-specific knowledge. While numerous improvements and open-source tools have been proposed to refine the \ac{RAG} framework for accuracy, relatively little attention has been given to improving the trustworthiness of generated results. To address this gap, we introduce TrustRAG, a novel framework that enhances \ac{RAG} from three perspectives: indexing, retrieval, and generation. Specifically, in the indexing stage, we propose a semantic-enhanced chunking strategy that incorporates hierarchical indexing to supplement each chunk with contextual information, ensuring semantic completeness. In the retrieval stage, we introduce a utility-based filtering mechanism to identify high-quality information, supporting answer generation while reducing input length. In the generation stage, we propose fine-grained citation enhancement, which detects opinion-bearing sentences in responses and infers citation relationships at the sentence-level, thereby improving citation accuracy. We open-source the TrustRAG framework and provide a demonstration studio designed for excerpt-based question answering tasks \footnote{https://huggingface.co/spaces/golaxy/TrustRAG}. Based on these, we aim to help researchers: 1) systematically enhancing the trustworthiness of \ac{RAG} systems and (2) developing their own \ac{RAG} systems with more reliable outputs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13719v1-abstract-full').style.display = 'none'; document.getElementById('2502.13719v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13464">arXiv:2502.13464</a> <span> [<a href="https://arxiv.org/pdf/2502.13464">pdf</a>, <a href="https://arxiv.org/format/2502.13464">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Estimating Commonsense Plausibility through Semantic Shifts </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cui%2C+W">Wanqing Cui</a>, <a href="/search/cs?searchtype=author&query=Bi%2C+K">Keping Bi</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+J">Jiafeng Guo</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xueqi Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13464v1-abstract-short" style="display: inline;"> Commonsense plausibility estimation is critical for evaluating language models (LMs), yet existing generative approaches--reliant on likelihoods or verbalized judgments--struggle with fine-grained discrimination. In this paper, we propose ComPaSS, a novel discriminative framework that quantifies commonsense plausibility by measuring semantic shifts when augmenting sentences with commonsense-relate… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13464v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13464v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13464v1-abstract-full" style="display: none;"> Commonsense plausibility estimation is critical for evaluating language models (LMs), yet existing generative approaches--reliant on likelihoods or verbalized judgments--struggle with fine-grained discrimination. In this paper, we propose ComPaSS, a novel discriminative framework that quantifies commonsense plausibility by measuring semantic shifts when augmenting sentences with commonsense-related information. Plausible augmentations induce minimal shifts in semantics, while implausible ones result in substantial deviations. Evaluations on two types of fine-grained commonsense plausibility estimation tasks across different backbones, including LLMs and vision-language models (VLMs), show that ComPaSS consistently outperforms baselines. It demonstrates the advantage of discriminative approaches over generative methods in fine-grained commonsense plausibility evaluation. Experiments also show that (1) VLMs yield superior performance to LMs, when integrated with ComPaSS, on vision-grounded commonsense tasks. (2) contrastive pre-training sharpens backbone models' ability to capture semantic nuances, thereby further enhancing ComPaSS. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13464v1-abstract-full').style.display = 'none'; document.getElementById('2502.13464v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13394">arXiv:2502.13394</a> <span> [<a href="https://arxiv.org/pdf/2502.13394">pdf</a>, <a href="https://arxiv.org/format/2502.13394">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Statistics Theory">math.ST</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Flow-based generative models as iterative algorithms in probability space </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xie%2C+Y">Yao Xie</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xiuyuan Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13394v1-abstract-short" style="display: inline;"> Generative AI (GenAI) has revolutionized data-driven modeling by enabling the synthesis of high-dimensional data across various applications, including image generation, language modeling, biomedical signal processing, and anomaly detection. Flow-based generative models provide a powerful framework for capturing complex probability distributions, offering exact likelihood estimation, efficient sam… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13394v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13394v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13394v1-abstract-full" style="display: none;"> Generative AI (GenAI) has revolutionized data-driven modeling by enabling the synthesis of high-dimensional data across various applications, including image generation, language modeling, biomedical signal processing, and anomaly detection. Flow-based generative models provide a powerful framework for capturing complex probability distributions, offering exact likelihood estimation, efficient sampling, and deterministic transformations between distributions. These models leverage invertible mappings governed by Ordinary Differential Equations (ODEs), enabling precise density estimation and likelihood evaluation. This tutorial presents an intuitive mathematical framework for flow-based generative models, formulating them as neural network-based representations of continuous probability densities. We explore key theoretical principles, including the Wasserstein metric, gradient flows, and density evolution governed by ODEs, to establish convergence guarantees and bridge empirical advancements with theoretical insights. By providing a rigorous yet accessible treatment, we aim to equip researchers and practitioners with the necessary tools to effectively apply flow-based generative models in signal processing and machine learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13394v1-abstract-full').style.display = 'none'; document.getElementById('2502.13394v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.13059">arXiv:2502.13059</a> <span> [<a href="https://arxiv.org/pdf/2502.13059">pdf</a>, <a href="https://arxiv.org/format/2502.13059">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> SimpleVQA: Multimodal Factuality Evaluation for Multimodal Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xianfu Cheng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+W">Wei Zhang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+S">Shiwei Zhang</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+J">Jian Yang</a>, <a href="/search/cs?searchtype=author&query=Guan%2C+X">Xiangyuan Guan</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+X">Xianjie Wu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xiang Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+G">Ge Zhang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+J">Jiaheng Liu</a>, <a href="/search/cs?searchtype=author&query=Mai%2C+Y">Yuying Mai</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Y">Yutao Zeng</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+Z">Zhoufutu Wen</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+K">Ke Jin</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Baorui Wang</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+W">Weixiao Zhou</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+Y">Yunhong Lu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+T">Tongliang Li</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+W">Wenhao Huang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhoujun Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.13059v1-abstract-short" style="display: inline;"> The increasing application of multi-modal large language models (MLLMs) across various sectors have spotlighted the essence of their output reliability and accuracy, particularly their ability to produce content grounded in factual information (e.g. common and domain-specific knowledge). In this work, we introduce SimpleVQA, the first comprehensive multi-modal benchmark to evaluate the factuality… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13059v1-abstract-full').style.display = 'inline'; document.getElementById('2502.13059v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.13059v1-abstract-full" style="display: none;"> The increasing application of multi-modal large language models (MLLMs) across various sectors have spotlighted the essence of their output reliability and accuracy, particularly their ability to produce content grounded in factual information (e.g. common and domain-specific knowledge). In this work, we introduce SimpleVQA, the first comprehensive multi-modal benchmark to evaluate the factuality ability of MLLMs to answer natural language short questions. SimpleVQA is characterized by six key features: it covers multiple tasks and multiple scenarios, ensures high quality and challenging queries, maintains static and timeless reference answers, and is straightforward to evaluate. Our approach involves categorizing visual question-answering items into 9 different tasks around objective events or common knowledge and situating these within 9 topics. Rigorous quality control processes are implemented to guarantee high-quality, concise, and clear answers, facilitating evaluation with minimal variance via an LLM-as-a-judge scoring system. Using SimpleVQA, we perform a comprehensive assessment of leading 18 MLLMs and 8 text-only LLMs, delving into their image comprehension and text generation abilities by identifying and analyzing error cases. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.13059v1-abstract-full').style.display = 'none'; document.getElementById('2502.13059v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11677">arXiv:2502.11677</a> <span> [<a href="https://arxiv.org/pdf/2502.11677">pdf</a>, <a href="https://arxiv.org/format/2502.11677">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Towards Fully Exploiting LLM Internal States to Enhance Knowledge Boundary Perception </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ni%2C+S">Shiyu Ni</a>, <a href="/search/cs?searchtype=author&query=Bi%2C+K">Keping Bi</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+J">Jiafeng Guo</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+L">Lulu Yu</a>, <a href="/search/cs?searchtype=author&query=Bi%2C+B">Baolong Bi</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xueqi Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11677v1-abstract-short" style="display: inline;"> Large language models (LLMs) exhibit impressive performance across diverse tasks but often struggle to accurately gauge their knowledge boundaries, leading to confident yet incorrect responses. This paper explores leveraging LLMs' internal states to enhance their perception of knowledge boundaries from efficiency and risk perspectives. We investigate whether LLMs can estimate their confidence usin… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11677v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11677v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11677v1-abstract-full" style="display: none;"> Large language models (LLMs) exhibit impressive performance across diverse tasks but often struggle to accurately gauge their knowledge boundaries, leading to confident yet incorrect responses. This paper explores leveraging LLMs' internal states to enhance their perception of knowledge boundaries from efficiency and risk perspectives. We investigate whether LLMs can estimate their confidence using internal states before response generation, potentially saving computational resources. Our experiments on datasets like Natural Questions, HotpotQA, and MMLU reveal that LLMs demonstrate significant pre-generation perception, which is further refined post-generation, with perception gaps remaining stable across varying conditions. To mitigate risks in critical domains, we introduce Consistency-based Confidence Calibration ($C^3$), which assesses confidence consistency through question reformulation. $C^3$ significantly improves LLMs' ability to recognize their knowledge gaps, enhancing the unknown perception rate by 5.6\% on NQ and 4.9\% on HotpotQA. Our findings suggest that pre-generation confidence estimation can optimize efficiency, while $C^3$ effectively controls output risks, advancing the reliability of LLMs in practical applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11677v1-abstract-full').style.display = 'none'; document.getElementById('2502.11677v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11550">arXiv:2502.11550</a> <span> [<a href="https://arxiv.org/pdf/2502.11550">pdf</a>, <a href="https://arxiv.org/format/2502.11550">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Trinity: A Scalable and Forward-Secure DSSE for Spatio-Temporal Range Query </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhijun Li</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+K">Kuizhi Liu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+M">Minghui Xu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiangyu Wang</a>, <a href="/search/cs?searchtype=author&query=Miao%2C+Y">Yinbin Miao</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+J">Jianfeng Ma</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xiuzhen Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11550v1-abstract-short" style="display: inline;"> Cloud-based outsourced Location-based services have profound impacts on various aspects of people's lives but bring security concerns. Existing spatio-temporal data secure retrieval schemes have significant shortcomings regarding dynamic updates, either compromising privacy through leakage during updates (forward insecurity) or incurring excessively high update costs that hinder practical applicat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11550v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11550v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11550v1-abstract-full" style="display: none;"> Cloud-based outsourced Location-based services have profound impacts on various aspects of people's lives but bring security concerns. Existing spatio-temporal data secure retrieval schemes have significant shortcomings regarding dynamic updates, either compromising privacy through leakage during updates (forward insecurity) or incurring excessively high update costs that hinder practical application. Under these circumstances, we first propose a basic filter-based spatio-temporal range query scheme \TrinityI that supports low-cost dynamic updates and automatic expansion. Furthermore, to improve security, reduce storage cost, and false positives, we propose a forward secure and verifiable scheme \TrinityII that simultaneously minimizes storage overhead. A formal security analysis proves that \TrinityI and \TrinityII are Indistinguishable under Selective Chosen-Plaintext Attack (IND-SCPA). Finally, extensive experiments demonstrate that our design \TrinityII significantly reduces storage requirements by 80\%, enables data retrieval at the 1 million-record level in just 0.01 seconds, and achieves 10 $\times$ update efficiency than state-of-art. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11550v1-abstract-full').style.display = 'none'; document.getElementById('2502.11550v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14pages,6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11414">arXiv:2502.11414</a> <span> [<a href="https://arxiv.org/pdf/2502.11414">pdf</a>, <a href="https://arxiv.org/format/2502.11414">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3701716.3715458">10.1145/3701716.3715458 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Unbiased Learning to Rank with Query-Level Click Propensity Estimation: Beyond Pointwise Observation and Relevance </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yu%2C+L">Lulu Yu</a>, <a href="/search/cs?searchtype=author&query=Bi%2C+K">Keping Bi</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+J">Jiafeng Guo</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shihao Liu</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xueqi Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11414v2-abstract-short" style="display: inline;"> Most existing unbiased learning-to-rank (ULTR) approaches are based on the user examination hypothesis, which assumes that users will click a result only if it is both relevant and observed (typically modeled by position). However, in real-world scenarios, users often click only one or two results after examining multiple relevant options, due to limited patience or because their information needs… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11414v2-abstract-full').style.display = 'inline'; document.getElementById('2502.11414v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11414v2-abstract-full" style="display: none;"> Most existing unbiased learning-to-rank (ULTR) approaches are based on the user examination hypothesis, which assumes that users will click a result only if it is both relevant and observed (typically modeled by position). However, in real-world scenarios, users often click only one or two results after examining multiple relevant options, due to limited patience or because their information needs have already been satisfied. Motivated by this, we propose a query-level click propensity model to capture the probability that users will click on different result lists, allowing for non-zero probabilities that users may not click on an observed relevant result. We hypothesize that this propensity increases when more potentially relevant results are present, and refer to this user behavior as relevance saturation bias. Our method introduces a Dual Inverse Propensity Weighting (DualIPW) mechanism -- combining query-level and position-level IPW -- to address both relevance saturation and position bias. Through theoretical derivation, we prove that DualIPW can learn an unbiased ranking model. Experiments on the real-world Baidu-ULTR dataset demonstrate that our approach significantly outperforms state-of-the-art ULTR baselines. The code and dataset information can be found at https://github.com/Trustworthy-Information-Access/DualIPW. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11414v2-abstract-full').style.display = 'none'; document.getElementById('2502.11414v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 pages, 3 figures, accepted by The ACM Web Conference (WWW) 2025 Short Paper Track</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11401">arXiv:2502.11401</a> <span> [<a href="https://arxiv.org/pdf/2502.11401">pdf</a>, <a href="https://arxiv.org/format/2502.11401">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Following the Autoregressive Nature of LLM Embeddings via Compression and Alignment </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Deng%2C+J">Jingcheng Deng</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Z">Zhongtao Jiang</a>, <a href="/search/cs?searchtype=author&query=Pang%2C+L">Liang Pang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+L">Liwei Chen</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+K">Kun Xu</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+Z">Zihao Wei</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+H">Huawei Shen</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xueqi Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11401v1-abstract-short" style="display: inline;"> A new trend uses LLMs as dense text encoders via contrastive learning. However, since LLM embeddings predict the probability distribution of the next token, they are inherently generative and distributive, conflicting with contrastive learning, which requires embeddings to capture full-text semantics and align via cosine similarity. This discrepancy hinders the full utilization of LLMs' pre-traini… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11401v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11401v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11401v1-abstract-full" style="display: none;"> A new trend uses LLMs as dense text encoders via contrastive learning. However, since LLM embeddings predict the probability distribution of the next token, they are inherently generative and distributive, conflicting with contrastive learning, which requires embeddings to capture full-text semantics and align via cosine similarity. This discrepancy hinders the full utilization of LLMs' pre-training capabilities, resulting in inefficient learning. In response to this issue, we propose AutoRegEmbed, a new contrastive learning method built on embedding conditional probability distributions, which integrates two core tasks: information compression and conditional distribution alignment. The information compression task encodes text into the embedding space, ensuring that the embedding vectors capture global semantics. The conditional distribution alignment task focuses on aligning text embeddings with positive samples embeddings by leveraging the conditional distribution of embeddings while simultaneously reducing the likelihood of generating negative samples from text embeddings, thereby achieving embedding alignment and uniformity. Experimental results demonstrate that our method significantly outperforms traditional contrastive learning approaches and achieves performance comparable to state-of-the-art models when using the same amount of data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11401v1-abstract-full').style.display = 'none'; document.getElementById('2502.11401v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11400">arXiv:2502.11400</a> <span> [<a href="https://arxiv.org/pdf/2502.11400">pdf</a>, <a href="https://arxiv.org/format/2502.11400">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Revisiting Robust RAG: Do We Still Need Complex Robust Training in the Era of Powerful LLMs? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ding%2C+H">Hanxing Ding</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+S">Shuchang Tao</a>, <a href="/search/cs?searchtype=author&query=Pang%2C+L">Liang Pang</a>, <a href="/search/cs?searchtype=author&query=Wei%2C+Z">Zihao Wei</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+L">Liwei Chen</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+K">Kun Xu</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+H">Huawei Shen</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xueqi Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11400v1-abstract-short" style="display: inline;"> Retrieval-augmented generation (RAG) systems often suffer from performance degradation when encountering noisy or irrelevant documents, driving researchers to develop sophisticated training strategies to enhance their robustness against such retrieval noise. However, as large language models (LLMs) continue to advance, the necessity of these complex training methods is increasingly questioned. In… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11400v1-abstract-full').style.display = 'inline'; document.getElementById('2502.11400v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11400v1-abstract-full" style="display: none;"> Retrieval-augmented generation (RAG) systems often suffer from performance degradation when encountering noisy or irrelevant documents, driving researchers to develop sophisticated training strategies to enhance their robustness against such retrieval noise. However, as large language models (LLMs) continue to advance, the necessity of these complex training methods is increasingly questioned. In this paper, we systematically investigate whether complex robust training strategies remain necessary as model capacity grows. Through comprehensive experiments spanning multiple model architectures and parameter scales, we evaluate various document selection methods and adversarial training techniques across diverse datasets. Our extensive experiments consistently demonstrate that as models become more powerful, the performance gains brought by complex robust training methods drop off dramatically. We delve into the rationale and find that more powerful models inherently exhibit superior confidence calibration, better generalization across datasets (even when trained with randomly selected documents), and optimal attention mechanisms learned with simpler strategies. Our findings suggest that RAG systems can benefit from simpler architectures and training strategies as models become more powerful, enabling more scalable applications with minimal complexity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11400v1-abstract-full').style.display = 'none'; document.getElementById('2502.11400v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.11177">arXiv:2502.11177</a> <span> [<a href="https://arxiv.org/pdf/2502.11177">pdf</a>, <a href="https://arxiv.org/format/2502.11177">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> The Mirage of Model Editing: Revisiting Evaluation in the Wild </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+W">Wanli Yang</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+F">Fei Sun</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+J">Jiajun Tan</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+X">Xinyu Ma</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Q">Qi Cao</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+H">Huawei Shen</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xueqi Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.11177v2-abstract-short" style="display: inline;"> Despite near-perfect results in artificial evaluations, the effectiveness of model editing in real-world applications remains unexplored. To bridge this gap, we propose to study model editing in question answering (QA) by establishing a rigorous evaluation practice to assess the effectiveness of editing methods in correcting LLMs' errors. It consists of QAEdit, a new benchmark derived from popular… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11177v2-abstract-full').style.display = 'inline'; document.getElementById('2502.11177v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.11177v2-abstract-full" style="display: none;"> Despite near-perfect results in artificial evaluations, the effectiveness of model editing in real-world applications remains unexplored. To bridge this gap, we propose to study model editing in question answering (QA) by establishing a rigorous evaluation practice to assess the effectiveness of editing methods in correcting LLMs' errors. It consists of QAEdit, a new benchmark derived from popular QA datasets, and a standardized evaluation framework. Our single editing experiments indicate that current editing methods perform substantially worse than previously reported (38.5% vs. ~96%). Through module analysis and controlled experiments, we demonstrate that this performance decline stems from issues in evaluation practices of prior editing research. One key issue is the inappropriate use of teacher forcing in testing prevents error propagation by feeding ground truth tokens (inaccessible in real-world scenarios) as input. Furthermore, we simulate real-world deployment by sequential editing, revealing that current approaches fail drastically with only 1000 edits. Our analysis provides a fundamental reexamination of both the real-world applicability of existing model editing methods and their evaluation practices, and establishes a rigorous evaluation framework with key insights to advance reliable and practical model editing research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.11177v2-abstract-full').style.display = 'none'; document.getElementById('2502.11177v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 16 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07327">arXiv:2502.07327</a> <span> [<a href="https://arxiv.org/pdf/2502.07327">pdf</a>, <a href="https://arxiv.org/format/2502.07327">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Generative Ghost: Investigating Ranking Bias Hidden in AI-Generated Videos </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gao%2C+H">Haowen Gao</a>, <a href="/search/cs?searchtype=author&query=Pang%2C+L">Liang Pang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+S">Shicheng Xu</a>, <a href="/search/cs?searchtype=author&query=Qu%2C+L">Leigang Qu</a>, <a href="/search/cs?searchtype=author&query=Chua%2C+T">Tat-Seng Chua</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+H">Huawei Shen</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xueqi Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07327v1-abstract-short" style="display: inline;"> With the rapid development of AI-generated content (AIGC), the creation of high-quality AI-generated videos has become faster and easier, resulting in the Internet being flooded with all kinds of video content. However, the impact of these videos on the content ecosystem remains largely unexplored. Video information retrieval remains a fundamental approach for accessing video content. Building on… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07327v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07327v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07327v1-abstract-full" style="display: none;"> With the rapid development of AI-generated content (AIGC), the creation of high-quality AI-generated videos has become faster and easier, resulting in the Internet being flooded with all kinds of video content. However, the impact of these videos on the content ecosystem remains largely unexplored. Video information retrieval remains a fundamental approach for accessing video content. Building on the observation that retrieval models often favor AI-generated content in ad-hoc and image retrieval tasks, we investigate whether similar biases emerge in the context of challenging video retrieval, where temporal and visual factors may further influence model behavior. To explore this, we first construct a comprehensive benchmark dataset containing both real and AI-generated videos, along with a set of fair and rigorous metrics to assess bias. This benchmark consists of 13,000 videos generated by two state-of-the-art open-source video generation models. We meticulously design a suite of rigorous metrics to accurately measure this preference, accounting for potential biases arising from the limited frame rate and suboptimal quality of AIGC videos. We then applied three off-the-shelf video retrieval models to perform retrieval tasks on this hybrid dataset. Our findings reveal a clear preference for AI-generated videos in retrieval. Further investigation shows that incorporating AI-generated videos into the training set of retrieval models exacerbates this bias. Unlike the preference observed in image modalities, we find that video retrieval bias arises from both unseen visual and temporal information, making the root causes of video bias a complex interplay of these two factors. To mitigate this bias, we fine-tune the retrieval models using a contrastive learning approach. The results of this study highlight the potential implications of AI-generated videos on retrieval systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07327v1-abstract-full').style.display = 'none'; document.getElementById('2502.07327v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.07288">arXiv:2502.07288</a> <span> [<a href="https://arxiv.org/pdf/2502.07288">pdf</a>, <a href="https://arxiv.org/format/2502.07288">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> KPIs 2024 Challenge: Advancing Glomerular Segmentation from Patch- to Slide-Level </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Deng%2C+R">Ruining Deng</a>, <a href="/search/cs?searchtype=author&query=Yao%2C+T">Tianyuan Yao</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+Y">Yucheng Tang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+J">Junlin Guo</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+S">Siqi Lu</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+J">Juming Xiong</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+L">Lining Yu</a>, <a href="/search/cs?searchtype=author&query=Cap%2C+Q+H">Quan Huu Cap</a>, <a href="/search/cs?searchtype=author&query=Cai%2C+P">Pengzhou Cai</a>, <a href="/search/cs?searchtype=author&query=Lan%2C+L">Libin Lan</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Ze Zhao</a>, <a href="/search/cs?searchtype=author&query=Galdran%2C+A">Adrian Galdran</a>, <a href="/search/cs?searchtype=author&query=Kumar%2C+A">Amit Kumar</a>, <a href="/search/cs?searchtype=author&query=Deotale%2C+G">Gunjan Deotale</a>, <a href="/search/cs?searchtype=author&query=Das%2C+D+K">Dev Kumar Das</a>, <a href="/search/cs?searchtype=author&query=Paik%2C+I">Inyoung Paik</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+J">Joonho Lee</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+G">Geongyu Lee</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yujia Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Wangkai Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhaoyang Li</a>, <a href="/search/cs?searchtype=author&query=Hou%2C+X">Xuege Hou</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z">Zeyuan Wu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shengjin Wang</a>, <a href="/search/cs?searchtype=author&query=Fischer%2C+M">Maximilian Fischer</a> , et al. (22 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.07288v1-abstract-short" style="display: inline;"> Chronic kidney disease (CKD) is a major global health issue, affecting over 10% of the population and causing significant mortality. While kidney biopsy remains the gold standard for CKD diagnosis and treatment, the lack of comprehensive benchmarks for kidney pathology segmentation hinders progress in the field. To address this, we organized the Kidney Pathology Image Segmentation (KPIs) Challenge… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07288v1-abstract-full').style.display = 'inline'; document.getElementById('2502.07288v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.07288v1-abstract-full" style="display: none;"> Chronic kidney disease (CKD) is a major global health issue, affecting over 10% of the population and causing significant mortality. While kidney biopsy remains the gold standard for CKD diagnosis and treatment, the lack of comprehensive benchmarks for kidney pathology segmentation hinders progress in the field. To address this, we organized the Kidney Pathology Image Segmentation (KPIs) Challenge, introducing a dataset that incorporates preclinical rodent models of CKD with over 10,000 annotated glomeruli from 60+ Periodic Acid Schiff (PAS)-stained whole slide images. The challenge includes two tasks, patch-level segmentation and whole slide image segmentation and detection, evaluated using the Dice Similarity Coefficient (DSC) and F1-score. By encouraging innovative segmentation methods that adapt to diverse CKD models and tissue conditions, the KPIs Challenge aims to advance kidney pathology analysis, establish new benchmarks, and enable precise, large-scale quantification for disease research and diagnosis. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.07288v1-abstract-full').style.display = 'none'; document.getElementById('2502.07288v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06872">arXiv:2502.06872</a> <span> [<a href="https://arxiv.org/pdf/2502.06872">pdf</a>, <a href="https://arxiv.org/format/2502.06872">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Towards Trustworthy Retrieval Augmented Generation for Large Language Models: A Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ni%2C+B">Bo Ni</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zheyuan Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+L">Leyao Wang</a>, <a href="/search/cs?searchtype=author&query=Lei%2C+Y">Yongjia Lei</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Yuying Zhao</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xueqi Cheng</a>, <a href="/search/cs?searchtype=author&query=Zeng%2C+Q">Qingkai Zeng</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+L">Luna Dong</a>, <a href="/search/cs?searchtype=author&query=Xia%2C+Y">Yinglong Xia</a>, <a href="/search/cs?searchtype=author&query=Kenthapadi%2C+K">Krishnaram Kenthapadi</a>, <a href="/search/cs?searchtype=author&query=Rossi%2C+R">Ryan Rossi</a>, <a href="/search/cs?searchtype=author&query=Dernoncourt%2C+F">Franck Dernoncourt</a>, <a href="/search/cs?searchtype=author&query=Tanjim%2C+M+M">Md Mehrab Tanjim</a>, <a href="/search/cs?searchtype=author&query=Ahmed%2C+N">Nesreen Ahmed</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+X">Xiaorui Liu</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+W">Wenqi Fan</a>, <a href="/search/cs?searchtype=author&query=Blasch%2C+E">Erik Blasch</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yu Wang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+M">Meng Jiang</a>, <a href="/search/cs?searchtype=author&query=Derr%2C+T">Tyler Derr</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06872v1-abstract-short" style="display: inline;"> Retrieval-Augmented Generation (RAG) is an advanced technique designed to address the challenges of Artificial Intelligence-Generated Content (AIGC). By integrating context retrieval into content generation, RAG provides reliable and up-to-date external knowledge, reduces hallucinations, and ensures relevant context across a wide range of tasks. However, despite RAG's success and potential, recent… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06872v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06872v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06872v1-abstract-full" style="display: none;"> Retrieval-Augmented Generation (RAG) is an advanced technique designed to address the challenges of Artificial Intelligence-Generated Content (AIGC). By integrating context retrieval into content generation, RAG provides reliable and up-to-date external knowledge, reduces hallucinations, and ensures relevant context across a wide range of tasks. However, despite RAG's success and potential, recent studies have shown that the RAG paradigm also introduces new risks, including robustness issues, privacy concerns, adversarial attacks, and accountability issues. Addressing these risks is critical for future applications of RAG systems, as they directly impact their trustworthiness. Although various methods have been developed to improve the trustworthiness of RAG methods, there is a lack of a unified perspective and framework for research in this topic. Thus, in this paper, we aim to address this gap by providing a comprehensive roadmap for developing trustworthy RAG systems. We place our discussion around five key perspectives: reliability, privacy, safety, fairness, explainability, and accountability. For each perspective, we present a general framework and taxonomy, offering a structured approach to understanding the current challenges, evaluating existing solutions, and identifying promising future research directions. To encourage broader adoption and innovation, we also highlight the downstream applications where trustworthy RAG systems have a significant impact. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06872v1-abstract-full').style.display = 'none'; document.getElementById('2502.06872v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.06868">arXiv:2502.06868</a> <span> [<a href="https://arxiv.org/pdf/2502.06868">pdf</a>, <a href="https://arxiv.org/format/2502.06868">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Related Knowledge Perturbation Matters: Rethinking Multiple Pieces of Knowledge Editing in Same-Subject </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Duan%2C+Z">Zenghao Duan</a>, <a href="/search/cs?searchtype=author&query=Duan%2C+W">Wenbin Duan</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+Z">Zhiyi Yin</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Y">Yinghan Shen</a>, <a href="/search/cs?searchtype=author&query=Jing%2C+S">Shaoling Jing</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jie Zhang</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+H">Huawei Shen</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xueqi Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.06868v1-abstract-short" style="display: inline;"> Knowledge editing has become a promising approach for efficiently and precisely updating knowledge embedded in large language models (LLMs). In this work, we focus on Same-Subject Editing, which involves modifying multiple attributes of a single entity to ensure comprehensive and consistent updates to entity-centric knowledge. Through preliminary observation, we identify a significant challenge: C… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06868v1-abstract-full').style.display = 'inline'; document.getElementById('2502.06868v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.06868v1-abstract-full" style="display: none;"> Knowledge editing has become a promising approach for efficiently and precisely updating knowledge embedded in large language models (LLMs). In this work, we focus on Same-Subject Editing, which involves modifying multiple attributes of a single entity to ensure comprehensive and consistent updates to entity-centric knowledge. Through preliminary observation, we identify a significant challenge: Current state-of-the-art editing methods struggle when tasked with editing multiple related knowledge pieces for the same subject. To address the lack of relevant editing data for identical subjects in traditional benchmarks, we introduce the $\text{S}^2\text{RKE}$(Same-Subject Related Knowledge Editing) benchmark. Our extensive experiments reveal that only mainstream locate-then-edit methods, such as ROME and MEMIT, exhibit "related knowledge perturbation," where subsequent edits interfere with earlier ones. Further analysis reveals that these methods over-rely on subject information, neglecting other critical factors, resulting in reduced editing effectiveness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.06868v1-abstract-full').style.display = 'none'; document.getElementById('2502.06868v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by NAACL 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.05471">arXiv:2502.05471</a> <span> [<a href="https://arxiv.org/pdf/2502.05471">pdf</a>, <a href="https://arxiv.org/format/2502.05471">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> Enhancing Expressive Voice Conversion with Discrete Pitch-Conditioned Flow Matching Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zuo%2C+J">Jialong Zuo</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+S">Shengpeng Ji</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+M">Minghui Fang</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Z">Ziyue Jiang</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xize Cheng</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Q">Qian Yang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+W">Wenrui Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+G">Guangyan Zhang</a>, <a href="/search/cs?searchtype=author&query=Tu%2C+Z">Zehai Tu</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+Y">Yiwen Guo</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Zhou Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.05471v1-abstract-short" style="display: inline;"> This paper introduces PFlow-VC, a conditional flow matching voice conversion model that leverages fine-grained discrete pitch tokens and target speaker prompt information for expressive voice conversion (VC). Previous VC works primarily focus on speaker conversion, with further exploration needed in enhancing expressiveness (such as prosody and emotion) for timbre conversion. Unlike previous metho… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05471v1-abstract-full').style.display = 'inline'; document.getElementById('2502.05471v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.05471v1-abstract-full" style="display: none;"> This paper introduces PFlow-VC, a conditional flow matching voice conversion model that leverages fine-grained discrete pitch tokens and target speaker prompt information for expressive voice conversion (VC). Previous VC works primarily focus on speaker conversion, with further exploration needed in enhancing expressiveness (such as prosody and emotion) for timbre conversion. Unlike previous methods, we adopt a simple and efficient approach to enhance the style expressiveness of voice conversion models. Specifically, we pretrain a self-supervised pitch VQVAE model to discretize speaker-irrelevant pitch information and leverage a masked pitch-conditioned flow matching model for Mel-spectrogram synthesis, which provides in-context pitch modeling capabilities for the speaker conversion model, effectively improving the voice style transfer capacity. Additionally, we improve timbre similarity by combining global timbre embeddings with time-varying timbre tokens. Experiments on unseen LibriTTS test-clean and emotional speech dataset ESD show the superiority of the PFlow-VC model in both timbre conversion and style transfer. Audio samples are available on the demo page https://speechai-demo.github.io/PFlow-VC/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.05471v1-abstract-full').style.display = 'none'; document.getElementById('2502.05471v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ICASSP 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00433">arXiv:2502.00433</a> <span> [<a href="https://arxiv.org/pdf/2502.00433">pdf</a>, <a href="https://arxiv.org/format/2502.00433">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> CAT Pruning: Cluster-Aware Token Pruning For Text-to-Image Diffusion Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xinle Cheng</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhuoming Chen</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+Z">Zhihao Jia</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00433v1-abstract-short" style="display: inline;"> Diffusion models have revolutionized generative tasks, especially in the domain of text-to-image synthesis; however, their iterative denoising process demands substantial computational resources. In this paper, we present a novel acceleration strategy that integrates token-level pruning with caching techniques to tackle this computational challenge. By employing noise relative magnitude, we identi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00433v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00433v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00433v1-abstract-full" style="display: none;"> Diffusion models have revolutionized generative tasks, especially in the domain of text-to-image synthesis; however, their iterative denoising process demands substantial computational resources. In this paper, we present a novel acceleration strategy that integrates token-level pruning with caching techniques to tackle this computational challenge. By employing noise relative magnitude, we identify significant token changes across denoising iterations. Additionally, we enhance token selection by incorporating spatial clustering and ensuring distributional balance. Our experiments demonstrate reveal a 50%-60% reduction in computational costs while preserving the performance of the model, thereby markedly increasing the efficiency of diffusion models. The code is available at https://github.com/ada-cheng/CAT-Pruning <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00433v1-abstract-full').style.display = 'none'; document.getElementById('2502.00433v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.00348">arXiv:2502.00348</a> <span> [<a href="https://arxiv.org/pdf/2502.00348">pdf</a>, <a href="https://arxiv.org/format/2502.00348">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Personalized Denoising Implicit Feedback for Robust Recommender System </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+K">Kaike Zhang</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+Q">Qi Cao</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yunfan Wu</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+F">Fei Sun</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+H">Huawei Shen</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xueqi Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.00348v1-abstract-short" style="display: inline;"> While implicit feedback is foundational to modern recommender systems, factors such as human error, uncertainty, and ambiguity in user behavior inevitably introduce significant noise into this feedback, adversely affecting the accuracy and robustness of recommendations. To address this issue, existing methods typically aim to reduce the training weight of noisy feedback or discard it entirely, bas… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00348v1-abstract-full').style.display = 'inline'; document.getElementById('2502.00348v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.00348v1-abstract-full" style="display: none;"> While implicit feedback is foundational to modern recommender systems, factors such as human error, uncertainty, and ambiguity in user behavior inevitably introduce significant noise into this feedback, adversely affecting the accuracy and robustness of recommendations. To address this issue, existing methods typically aim to reduce the training weight of noisy feedback or discard it entirely, based on the observation that noisy interactions often exhibit higher losses in the overall loss distribution. However, we identify two key issues: (1) there is a significant overlap between normal and noisy interactions in the overall loss distribution, and (2) this overlap becomes even more pronounced when transitioning from pointwise loss functions (e.g., BCE loss) to pairwise loss functions (e.g., BPR loss). This overlap leads traditional methods to misclassify noisy interactions as normal, and vice versa. To tackle these challenges, we further investigate the loss overlap and find that for a given user, there is a clear distinction between normal and noisy interactions in the user's personal loss distribution. Based on this insight, we propose a resampling strategy to Denoise using the user's Personal Loss distribution, named PLD, which reduces the probability of noisy interactions being optimized. Specifically, during each optimization iteration, we create a candidate item pool for each user and resample the items from this pool based on the user's personal loss distribution, prioritizing normal interactions. Additionally, we conduct a theoretical analysis to validate PLD's effectiveness and suggest ways to further enhance its performance. Extensive experiments conducted on three datasets with varying noise ratios demonstrate PLD's efficacy and robustness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.00348v1-abstract-full').style.display = 'none'; document.getElementById('2502.00348v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in WWW 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.18371">arXiv:2501.18371</a> <span> [<a href="https://arxiv.org/pdf/2501.18371">pdf</a>, <a href="https://arxiv.org/format/2501.18371">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> FLASH-FHE: A Heterogeneous Architecture for Fully Homomorphic Encryption Acceleration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Junxue Zhang</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xiaodian Cheng</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+G">Gang Cao</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+M">Meng Dai</a>, <a href="/search/cs?searchtype=author&query=Sun%2C+Y">Yijun Sun</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+H">Han Tian</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+D">Dian Shen</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Yong Wang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+K">Kai Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.18371v1-abstract-short" style="display: inline;"> While many hardware accelerators have recently been proposed to address the inefficiency problem of fully homomorphic encryption (FHE) schemes, none of them is able to deliver optimal performance when facing real-world FHE workloads consisting of a mixture of shallow and deep computations, due primarily to their homogeneous design principle. This paper presents FLASH-FHE, the first FHE accelerat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18371v1-abstract-full').style.display = 'inline'; document.getElementById('2501.18371v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.18371v1-abstract-full" style="display: none;"> While many hardware accelerators have recently been proposed to address the inefficiency problem of fully homomorphic encryption (FHE) schemes, none of them is able to deliver optimal performance when facing real-world FHE workloads consisting of a mixture of shallow and deep computations, due primarily to their homogeneous design principle. This paper presents FLASH-FHE, the first FHE accelerator with a heterogeneous architecture for mixed workloads. At its heart, FLASH-FHE designs two types of computation clusters, ie, bootstrappable and swift, to optimize for deep and shallow workloads respectively in terms of cryptographic parameters and hardware pipelines. We organize one bootstrappable and two swift clusters into one cluster affiliation, and present a scheduling scheme that provides sufficient acceleration for deep FHE workloads by utilizing all the affiliations, while improving parallelism for shallow FHE workloads by assigning one shallow workload per affiliation and dynamically decomposing the bootstrappable cluster into multiple swift pipelines to accelerate the assigned workload. We further show that these two types of clusters can share valuable on-chip memory, improving performance without significant resource consumption. We implement FLASH-FHE with RTL and synthesize it using both 7nm and 14/12nm technology nodes, and our experiment results demonstrate that FLASH-FHE achieves an average performance improvement of $1.4\times$ and $11.2\times$ compared to state-of-the-art FHE accelerators CraterLake and F1 for deep workloads, while delivering up to $8.0\times$ speedup for shallow workloads due to its heterogeneous architecture. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.18371v1-abstract-full').style.display = 'none'; document.getElementById('2501.18371v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.13312">arXiv:2501.13312</a> <span> [<a href="https://arxiv.org/pdf/2501.13312">pdf</a>, <a href="https://arxiv.org/format/2501.13312">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Tensor-Var: Variational Data Assimilation in Tensor Product Feature Space </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yiming Yang</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xiaoyuan Cheng</a>, <a href="/search/cs?searchtype=author&query=Giles%2C+D">Daniel Giles</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+S">Sibo Cheng</a>, <a href="/search/cs?searchtype=author&query=He%2C+Y">Yi He</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+X">Xiao Xue</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+B">Boli Chen</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+Y">Yukun Hu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.13312v2-abstract-short" style="display: inline;"> Variational data assimilation estimates the dynamical system states by minimizing a cost function that fits the numerical models with observational data. The widely used method, four-dimensional variational assimilation (4D-Var), has two primary challenges: (1) computationally demanding for complex nonlinear systems and (2) relying on state-observation mappings, which are often not perfectly known… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13312v2-abstract-full').style.display = 'inline'; document.getElementById('2501.13312v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.13312v2-abstract-full" style="display: none;"> Variational data assimilation estimates the dynamical system states by minimizing a cost function that fits the numerical models with observational data. The widely used method, four-dimensional variational assimilation (4D-Var), has two primary challenges: (1) computationally demanding for complex nonlinear systems and (2) relying on state-observation mappings, which are often not perfectly known. Deep learning (DL) has been used as a more expressive class of efficient model approximators to address these challenges. However, integrating such models into 4D-Var remains challenging due to their inherent nonlinearities and the lack of theoretical guarantees for consistency in assimilation results. In this paper, we propose Tensor-Var to address these challenges using kernel Conditional Mean Embedding (CME). Tensor-Var improves optimization efficiency by characterizing system dynamics and state-observation mappings as linear operators, leading to a convex cost function in the feature space. Furthermore, our method provides a new perspective to incorporate CME into 4D-Var, offering theoretical guarantees of consistent assimilation results between the original and feature spaces. To improve scalability, we propose a method to learn deep features (DFs) using neural networks within the Tensor-Var framework. Experiments on chaotic systems and global weather prediction with real-time observations show that Tensor-Var outperforms conventional and DL hybrid 4D-Var baselines in accuracy while achieving efficiency comparable to the static 3D-Var method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.13312v2-abstract-full').style.display = 'none'; document.getElementById('2501.13312v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.12948">arXiv:2501.12948</a> <span> [<a href="https://arxiv.org/pdf/2501.12948">pdf</a>, <a href="https://arxiv.org/format/2501.12948">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=DeepSeek-AI"> DeepSeek-AI</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+D">Daya Guo</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+D">Dejian Yang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Haowei Zhang</a>, <a href="/search/cs?searchtype=author&query=Song%2C+J">Junxiao Song</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruoyu Zhang</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+R">Runxin Xu</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Q">Qihao Zhu</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+S">Shirong Ma</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+P">Peiyi Wang</a>, <a href="/search/cs?searchtype=author&query=Bi%2C+X">Xiao Bi</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+X">Xiaokang Zhang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+X">Xingkai Yu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yu Wu</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Z+F">Z. F. Wu</a>, <a href="/search/cs?searchtype=author&query=Gou%2C+Z">Zhibin Gou</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+Z">Zhihong Shao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zhuoshu Li</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+Z">Ziyi Gao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+A">Aixin Liu</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+B">Bing Xue</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bingxuan Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+B">Bochao Wu</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+B">Bei Feng</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+C">Chengda Lu</a> , et al. (175 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.12948v1-abstract-short" style="display: inline;"> We introduce our first-generation reasoning models, DeepSeek-R1-Zero and DeepSeek-R1. DeepSeek-R1-Zero, a model trained via large-scale reinforcement learning (RL) without supervised fine-tuning (SFT) as a preliminary step, demonstrates remarkable reasoning capabilities. Through RL, DeepSeek-R1-Zero naturally emerges with numerous powerful and intriguing reasoning behaviors. However, it encounters… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12948v1-abstract-full').style.display = 'inline'; document.getElementById('2501.12948v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.12948v1-abstract-full" style="display: none;"> We introduce our first-generation reasoning models, DeepSeek-R1-Zero and DeepSeek-R1. DeepSeek-R1-Zero, a model trained via large-scale reinforcement learning (RL) without supervised fine-tuning (SFT) as a preliminary step, demonstrates remarkable reasoning capabilities. Through RL, DeepSeek-R1-Zero naturally emerges with numerous powerful and intriguing reasoning behaviors. However, it encounters challenges such as poor readability, and language mixing. To address these issues and further enhance reasoning performance, we introduce DeepSeek-R1, which incorporates multi-stage training and cold-start data before RL. DeepSeek-R1 achieves performance comparable to OpenAI-o1-1217 on reasoning tasks. To support the research community, we open-source DeepSeek-R1-Zero, DeepSeek-R1, and six dense models (1.5B, 7B, 8B, 14B, 32B, 70B) distilled from DeepSeek-R1 based on Qwen and Llama. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.12948v1-abstract-full').style.display = 'none'; document.getElementById('2501.12948v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.11034">arXiv:2501.11034</a> <span> [<a href="https://arxiv.org/pdf/2501.11034">pdf</a>, <a href="https://arxiv.org/format/2501.11034">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Generative Retrieval for Book search </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tang%2C+Y">Yubao Tang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruqing Zhang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+J">Jiafeng Guo</a>, <a href="/search/cs?searchtype=author&query=de+Rijke%2C+M">Maarten de Rijke</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shihao Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shuaiqing Wang</a>, <a href="/search/cs?searchtype=author&query=Yin%2C+D">Dawei Yin</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xueqi Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.11034v1-abstract-short" style="display: inline;"> In book search, relevant book information should be returned in response to a query. Books contain complex, multi-faceted information such as metadata, outlines, and main text, where the outline provides hierarchical information between chapters and sections. Generative retrieval (GR) is a new retrieval paradigm that consolidates corpus information into a single model to generate identifiers of do… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11034v1-abstract-full').style.display = 'inline'; document.getElementById('2501.11034v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.11034v1-abstract-full" style="display: none;"> In book search, relevant book information should be returned in response to a query. Books contain complex, multi-faceted information such as metadata, outlines, and main text, where the outline provides hierarchical information between chapters and sections. Generative retrieval (GR) is a new retrieval paradigm that consolidates corpus information into a single model to generate identifiers of documents that are relevant to a given query. How can GR be applied to book search? Directly applying GR to book search is a challenge due to the unique characteristics of book search: The model needs to retain the complex, multi-faceted information of the book, which increases the demand for labeled data. Splitting book information and treating it as a collection of separate segments for learning might result in a loss of hierarchical information. We propose an effective Generative retrieval framework for Book Search (GBS) that features two main components: data augmentation and outline-oriented book encoding. For data augmentation, GBS constructs multiple query-book pairs for training; it constructs multiple book identifiers based on the outline, various forms of book contents, and simulates real book retrieval scenarios with varied pseudo-queries. This includes coverage-promoting book identifier augmentation, allowing the model to learn to index effectively, and diversity-enhanced query augmentation, allowing the model to learn to retrieve effectively. Outline-oriented book encoding improves length extrapolation through bi-level positional encoding and retentive attention mechanisms to maintain context over long sequences. Experiments on a proprietary Baidu dataset demonstrate that GBS outperforms strong baselines, achieving a 9.8\% improvement in terms of MRR@20, over the state-of-the-art RIPOR method... <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.11034v1-abstract-full').style.display = 'none'; document.getElementById('2501.11034v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at KDD ADS 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.10983">arXiv:2501.10983</a> <span> [<a href="https://arxiv.org/pdf/2501.10983">pdf</a>, <a href="https://arxiv.org/format/2501.10983">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> CIBPU: A Conflict-Invisible Secure Branch Prediction Unit </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhou%2C+Z">Zhe Zhou</a>, <a href="/search/cs?searchtype=author&query=Tong%2C+F">Fei Tong</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Hongyu Wang</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xiaoyu Cheng</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+F">Fang Jiang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+Z">Zhikun Zhang</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+Y">Yuxing Mao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.10983v1-abstract-short" style="display: inline;"> Previous schemes for designing secure branch prediction unit (SBPU) based on physical isolation can only offer limited security and significantly affect BPU's prediction capability, leading to prominent performance degradation. Moreover, encryption-based SBPU schemes based on periodic key re-randomization have the risk of being compromised by advanced attack algorithms, and the performance overhea… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10983v1-abstract-full').style.display = 'inline'; document.getElementById('2501.10983v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.10983v1-abstract-full" style="display: none;"> Previous schemes for designing secure branch prediction unit (SBPU) based on physical isolation can only offer limited security and significantly affect BPU's prediction capability, leading to prominent performance degradation. Moreover, encryption-based SBPU schemes based on periodic key re-randomization have the risk of being compromised by advanced attack algorithms, and the performance overhead is also considerable. To this end, this paper proposes a conflict-invisible SBPU (CIBPU). CIBPU employs redundant storage design, load-aware indexing, and replacement design, as well as an encryption mechanism without requiring periodic key updates, to prevent attackers' perception of branch conflicts. We provide a thorough security analysis, which shows that CIBPU achieves strong security throughout the BPU's lifecycle. We implement CIBPU in a RISC-V core model in gem5. The experimental results show that CIBPU causes an average performance overhead of only 1.12%-2.20% with acceptable hardware storage overhead, which is the lowest among the state-of-the-art SBPU schemes. CIBPU has also been implemented in the open-source RISC-V core, SonicBOOM, which is then burned onto an FPGA board. The evaluation based on the board shows an average performance degradation of 2.01%, which is approximately consistent with the result obtained in gem5. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.10983v1-abstract-full').style.display = 'none'; document.getElementById('2501.10983v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 10 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.09921">arXiv:2501.09921</a> <span> [<a href="https://arxiv.org/pdf/2501.09921">pdf</a>, <a href="https://arxiv.org/format/2501.09921">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> TalkingEyes: Pluralistic Speech-Driven 3D Eye Gaze Animation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhuang%2C+Y">Yixiang Zhuang</a>, <a href="/search/cs?searchtype=author&query=Ma%2C+C">Chunshan Ma</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+Y">Yao Cheng</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xuan Cheng</a>, <a href="/search/cs?searchtype=author&query=Liao%2C+J">Jing Liao</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+J">Juncong Lin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.09921v1-abstract-short" style="display: inline;"> Although significant progress has been made in the field of speech-driven 3D facial animation recently, the speech-driven animation of an indispensable facial component, eye gaze, has been overlooked by recent research. This is primarily due to the weak correlation between speech and eye gaze, as well as the scarcity of audio-gaze data, making it very challenging to generate 3D eye gaze motion fro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09921v1-abstract-full').style.display = 'inline'; document.getElementById('2501.09921v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.09921v1-abstract-full" style="display: none;"> Although significant progress has been made in the field of speech-driven 3D facial animation recently, the speech-driven animation of an indispensable facial component, eye gaze, has been overlooked by recent research. This is primarily due to the weak correlation between speech and eye gaze, as well as the scarcity of audio-gaze data, making it very challenging to generate 3D eye gaze motion from speech alone. In this paper, we propose a novel data-driven method which can generate diverse 3D eye gaze motions in harmony with the speech. To achieve this, we firstly construct an audio-gaze dataset that contains about 14 hours of audio-mesh sequences featuring high-quality eye gaze motion, head motion and facial motion simultaneously. The motion data is acquired by performing lightweight eye gaze fitting and face reconstruction on videos from existing audio-visual datasets. We then tailor a novel speech-to-motion translation framework in which the head motions and eye gaze motions are jointly generated from speech but are modeled in two separate latent spaces. This design stems from the physiological knowledge that the rotation range of eyeballs is less than that of head. Through mapping the speech embedding into the two latent spaces, the difficulty in modeling the weak correlation between speech and non-verbal motion is thus attenuated. Finally, our TalkingEyes, integrated with a speech-driven 3D facial motion generator, can synthesize eye gaze motion, eye blinks, head motion and facial motion collectively from speech. Extensive quantitative and qualitative evaluations demonstrate the superiority of the proposed method in generating diverse and natural 3D eye gaze motions from speech. The project page of this paper is: https://lkjkjoiuiu.github.io/TalkingEyes_Home/ <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.09921v1-abstract-full').style.display = 'none'; document.getElementById('2501.09921v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.06071">arXiv:2501.06071</a> <span> [<a href="https://arxiv.org/pdf/2501.06071">pdf</a>, <a href="https://arxiv.org/format/2501.06071">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Unveiling Malware Patterns: A Self-analysis Perspective </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhong%2C+F">Fangtian Zhong</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+Q">Qin Hu</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+Y">Yili Jiang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+J">Jiaqi Huang</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xiuzhen Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.06071v1-abstract-short" style="display: inline;"> The widespread usage of Microsoft Windows has unfortunately led to a surge in malware, posing a serious threat to the security and privacy of millions of users. In response, the research community has mobilized, with numerous efforts dedicated to strengthening defenses against these threats. The primary goal of these techniques is to detect malicious software early, preventing attacks before any d… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06071v1-abstract-full').style.display = 'inline'; document.getElementById('2501.06071v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.06071v1-abstract-full" style="display: none;"> The widespread usage of Microsoft Windows has unfortunately led to a surge in malware, posing a serious threat to the security and privacy of millions of users. In response, the research community has mobilized, with numerous efforts dedicated to strengthening defenses against these threats. The primary goal of these techniques is to detect malicious software early, preventing attacks before any damage occurs. However, many of these methods either claim that packing has minimal impact on malware detection or fail to address the reliability of their approaches when applied to packed samples. Consequently, they are not capable of assisting victims in handling packed programs or recovering from the damages caused by untimely malware detection. In light of these challenges, we propose VisUnpack, a static analysis-based data visualization framework for bolstering attack prevention while aiding recovery post-attack by unveiling malware patterns and offering more detailed information including both malware class and family. Our method includes unpacking packed malware programs, calculating local similarity descriptors based on basic blocks, enhancing correlations between descriptors, and refining them by minimizing noises to obtain self-analysis descriptors. Moreover, we employ machine learning to learn the correlations of self-analysis descriptors through architectural learning for final classification. Our comprehensive evaluation of VisUnpack based on a freshly gathered dataset with over 27,106 samples confirms its capability in accurately classifying malware programs with a precision of 99.7%. Additionally, VisUnpack reveals that most antivirus products in VirusTotal can not handle packed samples properly or provide precise malware classification information. We also achieve over 97% space savings compared to existing data visualization based methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.06071v1-abstract-full').style.display = 'none'; document.getElementById('2501.06071v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02807">arXiv:2501.02807</a> <span> [<a href="https://arxiv.org/pdf/2501.02807">pdf</a>, <a href="https://arxiv.org/format/2501.02807">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> AE-NeRF: Augmenting Event-Based Neural Radiance Fields for Non-ideal Conditions and Larger Scene </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Feng%2C+C">Chaoran Feng</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+W">Wangbo Yu</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xinhua Cheng</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+Z">Zhenyu Tang</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Junwu Zhang</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+L">Li Yuan</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+Y">Yonghong Tian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02807v2-abstract-short" style="display: inline;"> Compared to frame-based methods, computational neuromorphic imaging using event cameras offers significant advantages, such as minimal motion blur, enhanced temporal resolution, and high dynamic range. The multi-view consistency of Neural Radiance Fields combined with the unique benefits of event cameras, has spurred recent research into reconstructing NeRF from data captured by moving event camer… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02807v2-abstract-full').style.display = 'inline'; document.getElementById('2501.02807v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02807v2-abstract-full" style="display: none;"> Compared to frame-based methods, computational neuromorphic imaging using event cameras offers significant advantages, such as minimal motion blur, enhanced temporal resolution, and high dynamic range. The multi-view consistency of Neural Radiance Fields combined with the unique benefits of event cameras, has spurred recent research into reconstructing NeRF from data captured by moving event cameras. While showing impressive performance, existing methods rely on ideal conditions with the availability of uniform and high-quality event sequences and accurate camera poses, and mainly focus on the object level reconstruction, thus limiting their practical applications. In this work, we propose AE-NeRF to address the challenges of learning event-based NeRF from non-ideal conditions, including non-uniform event sequences, noisy poses, and various scales of scenes. Our method exploits the density of event streams and jointly learn a pose correction module with an event-based NeRF (e-NeRF) framework for robust 3D reconstruction from inaccurate camera poses. To generalize to larger scenes, we propose hierarchical event distillation with a proposal e-NeRF network and a vanilla e-NeRF network to resample and refine the reconstruction process. We further propose an event reconstruction loss and a temporal loss to improve the view consistency of the reconstructed scene. We established a comprehensive benchmark that includes large-scale scenes to simulate practical non-ideal conditions, incorporating both synthetic and challenging real-world event datasets. The experimental results show that our method achieves a new state-of-the-art in event-based 3D reconstruction. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02807v2-abstract-full').style.display = 'none'; document.getElementById('2501.02807v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.02116">arXiv:2501.02116</a> <span> [<a href="https://arxiv.org/pdf/2501.02116">pdf</a>, <a href="https://arxiv.org/format/2501.02116">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Humanoid Locomotion and Manipulation: Current Progress and Challenges in Control, Planning, and Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gu%2C+Z">Zhaoyuan Gu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Junheng Li</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+W">Wenlan Shen</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+W">Wenhao Yu</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+Z">Zhaoming Xie</a>, <a href="/search/cs?searchtype=author&query=McCrory%2C+S">Stephen McCrory</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xianyi Cheng</a>, <a href="/search/cs?searchtype=author&query=Shamsah%2C+A">Abdulaziz Shamsah</a>, <a href="/search/cs?searchtype=author&query=Griffin%2C+R">Robert Griffin</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+C+K">C. Karen Liu</a>, <a href="/search/cs?searchtype=author&query=Kheddar%2C+A">Abderrahmane Kheddar</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+X+B">Xue Bin Peng</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Y">Yuke Zhu</a>, <a href="/search/cs?searchtype=author&query=Shi%2C+G">Guanya Shi</a>, <a href="/search/cs?searchtype=author&query=Nguyen%2C+Q">Quan Nguyen</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+G">Gordon Cheng</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+H">Huijun Gao</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Y">Ye Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.02116v1-abstract-short" style="display: inline;"> Humanoid robots have great potential to perform various human-level skills. These skills involve locomotion, manipulation, and cognitive capabilities. Driven by advances in machine learning and the strength of existing model-based approaches, these capabilities have progressed rapidly, but often separately. Therefore, a timely overview of current progress and future trends in this fast-evolving fi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02116v1-abstract-full').style.display = 'inline'; document.getElementById('2501.02116v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.02116v1-abstract-full" style="display: none;"> Humanoid robots have great potential to perform various human-level skills. These skills involve locomotion, manipulation, and cognitive capabilities. Driven by advances in machine learning and the strength of existing model-based approaches, these capabilities have progressed rapidly, but often separately. Therefore, a timely overview of current progress and future trends in this fast-evolving field is essential. This survey first summarizes the model-based planning and control that have been the backbone of humanoid robotics for the past three decades. We then explore emerging learning-based methods, with a focus on reinforcement learning and imitation learning that enhance the versatility of loco-manipulation skills. We examine the potential of integrating foundation models with humanoid embodiments, assessing the prospects for developing generalist humanoid agents. In addition, this survey covers emerging research for whole-body tactile sensing that unlocks new humanoid skills that involve physical interactions. The survey concludes with a discussion of the challenges and future trends. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.02116v1-abstract-full').style.display = 'none'; document.getElementById('2501.02116v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.01384">arXiv:2501.01384</a> <span> [<a href="https://arxiv.org/pdf/2501.01384">pdf</a>, <a href="https://arxiv.org/format/2501.01384">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Sound">cs.SD</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Audio and Speech Processing">eess.AS</span> </div> </div> <p class="title is-5 mathjax"> OmniChat: Enhancing Spoken Dialogue Systems with Scalable Synthetic Data for Diverse Scenarios </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xize Cheng</a>, <a href="/search/cs?searchtype=author&query=Fu%2C+D">Dongjie Fu</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+X">Xiaoda Yang</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+M">Minghui Fang</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+R">Ruofan Hu</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+J">Jingyu Lu</a>, <a href="/search/cs?searchtype=author&query=Jionghao%2C+B">Bai Jionghao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zehan Wang</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+S">Shengpeng Ji</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+R">Rongjie Huang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+L">Linjun Li</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Y">Yu Chen</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+T">Tao Jin</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+Z">Zhou Zhao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.01384v1-abstract-short" style="display: inline;"> With the rapid development of large language models, researchers have created increasingly advanced spoken dialogue systems that can naturally converse with humans. However, these systems still struggle to handle the full complexity of real-world conversations, including audio events, musical contexts, and emotional expressions, mainly because current dialogue datasets are constrained in both scal… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01384v1-abstract-full').style.display = 'inline'; document.getElementById('2501.01384v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.01384v1-abstract-full" style="display: none;"> With the rapid development of large language models, researchers have created increasingly advanced spoken dialogue systems that can naturally converse with humans. However, these systems still struggle to handle the full complexity of real-world conversations, including audio events, musical contexts, and emotional expressions, mainly because current dialogue datasets are constrained in both scale and scenario diversity. In this paper, we propose leveraging synthetic data to enhance the dialogue models across diverse scenarios. We introduce ShareChatX, the first comprehensive, large-scale dataset for spoken dialogue that spans diverse scenarios. Based on this dataset, we introduce OmniChat, a multi-turn dialogue system with a heterogeneous feature fusion module, designed to optimize feature selection in different dialogue contexts. In addition, we explored critical aspects of training dialogue systems using synthetic data. Through comprehensive experimentation, we determined the ideal balance between synthetic and real data, achieving state-of-the-art results on the real-world dialogue dataset DailyTalk. We also highlight the crucial importance of synthetic data in tackling diverse, complex dialogue scenarios, especially those involving audio and music. For more details, please visit our demo page at \url{https://sharechatx.github.io/}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01384v1-abstract-full').style.display = 'none'; document.getElementById('2501.01384v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2501.01306">arXiv:2501.01306</a> <span> [<a href="https://arxiv.org/pdf/2501.01306">pdf</a>, <a href="https://arxiv.org/format/2501.01306">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Think More, Hallucinate Less: Mitigating Hallucinations via Dual Process of Fast and Slow Thinking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xiaoxue Cheng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Junyi Li</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+W+X">Wayne Xin Zhao</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+J">Ji-Rong Wen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2501.01306v2-abstract-short" style="display: inline;"> Large language models (LLMs) demonstrate exceptional capabilities, yet still face the hallucination issue. Typical text generation approaches adopt an auto-regressive generation without deliberate reasoning, which often results in untrustworthy and factually inaccurate responses. In this paper, we propose HaluSearch, a novel framework that incorporates tree search-based algorithms (e.g. MCTS) to e… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01306v2-abstract-full').style.display = 'inline'; document.getElementById('2501.01306v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2501.01306v2-abstract-full" style="display: none;"> Large language models (LLMs) demonstrate exceptional capabilities, yet still face the hallucination issue. Typical text generation approaches adopt an auto-regressive generation without deliberate reasoning, which often results in untrustworthy and factually inaccurate responses. In this paper, we propose HaluSearch, a novel framework that incorporates tree search-based algorithms (e.g. MCTS) to enable an explicit slow thinking generation process for mitigating hallucinations of LLMs during inference. Specifically, HaluSearch frames text generation as a step-by-step reasoning process, using a self-evaluation reward model to score each generation step and guide the tree search towards the most reliable generation pathway for fully exploiting the internal knowledge of LLMs. To balance efficiency and quality, we introduce a hierarchical thinking system switch mechanism inspired by the dual process theory in cognitive science, which dynamically alternates between fast and slow thinking modes at both the instance and step levels, adapting to the complexity of questions and reasoning states. We conduct extensive experiments on both English and Chinese datasets and the results show that our approach significantly outperforms baseline approaches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2501.01306v2-abstract-full').style.display = 'none'; document.getElementById('2501.01306v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 January, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.20891">arXiv:2412.20891</a> <span> [<a href="https://arxiv.org/pdf/2412.20891">pdf</a>, <a href="https://arxiv.org/format/2412.20891">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> DoTA: Weight-Decomposed Tensor Adaptation for Large Language Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hu%2C+X">Xiaolin Hu</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xiang Cheng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+P">Peiyu Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+W">Wei Liu</a>, <a href="/search/cs?searchtype=author&query=Luan%2C+J">Jian Luan</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bin Wang</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yong Liu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.20891v1-abstract-short" style="display: inline;"> Low-rank adaptation (LoRA) reduces the computational and memory demands of fine-tuning large language models (LLMs) by approximating updates with low-rank matrices. However, low-rank approximation in two-dimensional space fails to capture high-dimensional structures within the target matrix. Recently, tensor decomposition methods have been explored for fine-tuning LLMs, leveraging their ability to… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.20891v1-abstract-full').style.display = 'inline'; document.getElementById('2412.20891v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.20891v1-abstract-full" style="display: none;"> Low-rank adaptation (LoRA) reduces the computational and memory demands of fine-tuning large language models (LLMs) by approximating updates with low-rank matrices. However, low-rank approximation in two-dimensional space fails to capture high-dimensional structures within the target matrix. Recently, tensor decomposition methods have been explored for fine-tuning LLMs, leveraging their ability to extract structured information. Yet, these approaches primarily rely on random initialization, and the impact of initialization on tensor adaptation remains underexplored. In this paper, we reveal that random initialization significantly diverges from the validation loss achieved by full fine-tuning. To address this, we propose Weight-Decomposed Tensor Adaptation (DoTA), which leverages the Matrix Product Operator (MPO) decomposition of pre-trained weights for effective initialization in fine-tuning LLMs. Additionally, we introduce QDoTA, a quantized version of DoTA designed for 4-bit quantization. Experiments on commonsense and arithmetic reasoning tasks show that DoTA outperforms random initialization methods with fewer parameters. QDoTA further reduces memory consumption and achieves comparable performance to DoTA on commonsense reasoning tasks. We will release our code to support future research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.20891v1-abstract-full').style.display = 'none'; document.getElementById('2412.20891v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 6 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.19437">arXiv:2412.19437</a> <span> [<a href="https://arxiv.org/pdf/2412.19437">pdf</a>, <a href="https://arxiv.org/format/2412.19437">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> DeepSeek-V3 Technical Report </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=DeepSeek-AI"> DeepSeek-AI</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+A">Aixin Liu</a>, <a href="/search/cs?searchtype=author&query=Feng%2C+B">Bei Feng</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+B">Bing Xue</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bingxuan Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+B">Bochao Wu</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+C">Chengda Lu</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+C">Chenggang Zhao</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+C">Chengqi Deng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+C">Chenyu Zhang</a>, <a href="/search/cs?searchtype=author&query=Ruan%2C+C">Chong Ruan</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+D">Damai Dai</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+D">Daya Guo</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+D">Dejian Yang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+D">Deli Chen</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+D">Dongjie Ji</a>, <a href="/search/cs?searchtype=author&query=Li%2C+E">Erhang Li</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+F">Fangyun Lin</a>, <a href="/search/cs?searchtype=author&query=Dai%2C+F">Fucong Dai</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+F">Fuli Luo</a>, <a href="/search/cs?searchtype=author&query=Hao%2C+G">Guangbo Hao</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+G">Guanting Chen</a>, <a href="/search/cs?searchtype=author&query=Li%2C+G">Guowei Li</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">H. Zhang</a>, <a href="/search/cs?searchtype=author&query=Bao%2C+H">Han Bao</a> , et al. (175 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.19437v2-abstract-short" style="display: inline;"> We present DeepSeek-V3, a strong Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token. To achieve efficient inference and cost-effective training, DeepSeek-V3 adopts Multi-head Latent Attention (MLA) and DeepSeekMoE architectures, which were thoroughly validated in DeepSeek-V2. Furthermore, DeepSeek-V3 pioneers an auxiliary-loss-free strategy for loa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.19437v2-abstract-full').style.display = 'inline'; document.getElementById('2412.19437v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.19437v2-abstract-full" style="display: none;"> We present DeepSeek-V3, a strong Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token. To achieve efficient inference and cost-effective training, DeepSeek-V3 adopts Multi-head Latent Attention (MLA) and DeepSeekMoE architectures, which were thoroughly validated in DeepSeek-V2. Furthermore, DeepSeek-V3 pioneers an auxiliary-loss-free strategy for load balancing and sets a multi-token prediction training objective for stronger performance. We pre-train DeepSeek-V3 on 14.8 trillion diverse and high-quality tokens, followed by Supervised Fine-Tuning and Reinforcement Learning stages to fully harness its capabilities. Comprehensive evaluations reveal that DeepSeek-V3 outperforms other open-source models and achieves performance comparable to leading closed-source models. Despite its excellent performance, DeepSeek-V3 requires only 2.788M H800 GPU hours for its full training. In addition, its training process is remarkably stable. Throughout the entire training process, we did not experience any irrecoverable loss spikes or perform any rollbacks. The model checkpoints are available at https://github.com/deepseek-ai/DeepSeek-V3. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.19437v2-abstract-full').style.display = 'none'; document.getElementById('2412.19437v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.18770">arXiv:2412.18770</a> <span> [<a href="https://arxiv.org/pdf/2412.18770">pdf</a>, <a href="https://arxiv.org/format/2412.18770">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> Attack-in-the-Chain: Bootstrapping Large Language Models for Attacks Against Black-box Neural Ranking Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yu-An Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruqing Zhang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+J">Jiafeng Guo</a>, <a href="/search/cs?searchtype=author&query=de+Rijke%2C+M">Maarten de Rijke</a>, <a href="/search/cs?searchtype=author&query=Fan%2C+Y">Yixing Fan</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xueqi Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.18770v1-abstract-short" style="display: inline;"> Neural ranking models (NRMs) have been shown to be highly effective in terms of retrieval performance. Unfortunately, they have also displayed a higher degree of sensitivity to attacks than previous generation models. To help expose and address this lack of robustness, we introduce a novel ranking attack framework named Attack-in-the-Chain, which tracks interactions between large language models (… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18770v1-abstract-full').style.display = 'inline'; document.getElementById('2412.18770v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.18770v1-abstract-full" style="display: none;"> Neural ranking models (NRMs) have been shown to be highly effective in terms of retrieval performance. Unfortunately, they have also displayed a higher degree of sensitivity to attacks than previous generation models. To help expose and address this lack of robustness, we introduce a novel ranking attack framework named Attack-in-the-Chain, which tracks interactions between large language models (LLMs) and NRMs based on chain-of-thought (CoT) prompting to generate adversarial examples under black-box settings. Our approach starts by identifying anchor documents with higher ranking positions than the target document as nodes in the reasoning chain. We then dynamically assign the number of perturbation words to each node and prompt LLMs to execute attacks. Finally, we verify the attack performance of all nodes at each reasoning step and proceed to generate the next reasoning step. Empirical results on two web search benchmarks show the effectiveness of our method. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18770v1-abstract-full').style.display = 'none'; document.getElementById('2412.18770v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by AAAI25</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.18768">arXiv:2412.18768</a> <span> [<a href="https://arxiv.org/pdf/2412.18768">pdf</a>, <a href="https://arxiv.org/ps/2412.18768">ps</a>, <a href="https://arxiv.org/format/2412.18768">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Information Retrieval">cs.IR</span> </div> </div> <p class="title is-5 mathjax"> On the Robustness of Generative Information Retrieval Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+Y">Yu-An Liu</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+R">Ruqing Zhang</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+J">Jiafeng Guo</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+C">Changjiang Zhou</a>, <a href="/search/cs?searchtype=author&query=de+Rijke%2C+M">Maarten de Rijke</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xueqi Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.18768v1-abstract-short" style="display: inline;"> Generative information retrieval methods retrieve documents by directly generating their identifiers. Much effort has been devoted to developing effective generative IR models. Less attention has been paid to the robustness of these models. It is critical to assess the out-of-distribution (OOD) generalization of generative IR models, i.e., how would such models generalize to new distributions? To… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18768v1-abstract-full').style.display = 'inline'; document.getElementById('2412.18768v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.18768v1-abstract-full" style="display: none;"> Generative information retrieval methods retrieve documents by directly generating their identifiers. Much effort has been devoted to developing effective generative IR models. Less attention has been paid to the robustness of these models. It is critical to assess the out-of-distribution (OOD) generalization of generative IR models, i.e., how would such models generalize to new distributions? To answer this question, we focus on OOD scenarios from four perspectives in retrieval problems: (i)query variations; (ii)unseen query types; (iii)unseen tasks; and (iv)corpus expansion. Based on this taxonomy, we conduct empirical studies to analyze the OOD robustness of representative generative IR models against dense retrieval models. Our empirical results indicate that the OOD robustness of generative IR models is in need of improvement. By inspecting the OOD robustness of generative IR models we aim to contribute to the development of more reliable IR models. The code is available at \url{https://github.com/Davion-Liu/GR_OOD}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.18768v1-abstract-full').style.display = 'none'; document.getElementById('2412.18768v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ECIR 2025. arXiv admin note: substantial text overlap with arXiv:2306.12756</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.16778">arXiv:2412.16778</a> <span> [<a href="https://arxiv.org/pdf/2412.16778">pdf</a>, <a href="https://arxiv.org/format/2412.16778">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> RoomPainter: View-Integrated Diffusion for Consistent Indoor Scene Texturing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+Z">Zhipeng Huang</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+W">Wangbo Yu</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xinhua Cheng</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+C">ChengShu Zhao</a>, <a href="/search/cs?searchtype=author&query=Ge%2C+Y">Yunyang Ge</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+M">Mingyi Guo</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+L">Li Yuan</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+Y">Yonghong Tian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.16778v1-abstract-short" style="display: inline;"> Indoor scene texture synthesis has garnered significant interest due to its important potential applications in virtual reality, digital media, and creative arts. Existing diffusion model-based researches either rely on per-view inpainting techniques, which are plagued by severe cross-view inconsistencies and conspicuous seams, or they resort to optimization-based approaches that entail substantia… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16778v1-abstract-full').style.display = 'inline'; document.getElementById('2412.16778v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.16778v1-abstract-full" style="display: none;"> Indoor scene texture synthesis has garnered significant interest due to its important potential applications in virtual reality, digital media, and creative arts. Existing diffusion model-based researches either rely on per-view inpainting techniques, which are plagued by severe cross-view inconsistencies and conspicuous seams, or they resort to optimization-based approaches that entail substantial computational overhead. In this work, we present RoomPainter, a framework that seamlessly integrates efficiency and consistency to achieve high-fidelity texturing of indoor scenes. The core of RoomPainter features a zero-shot technique that effectively adapts a 2D diffusion model for 3D-consistent texture synthesis, along with a two-stage generation strategy that ensures both global and local consistency. Specifically, we introduce Attention-Guided Multi-View Integrated Sampling (MVIS) combined with a neighbor-integrated attention mechanism for zero-shot texture map generation. Using the MVIS, we firstly generate texture map for the entire room to ensure global consistency, then adopt its variant, namely an attention-guided multi-view integrated repaint sampling (MVRS) to repaint individual instances within the room, thereby further enhancing local consistency. Experiments demonstrate that RoomPainter achieves superior performance for indoor scene texture synthesis in visual quality, global consistency, and generation efficiency. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.16778v1-abstract-full').style.display = 'none'; document.getElementById('2412.16778v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.14461">arXiv:2412.14461</a> <span> [<a href="https://arxiv.org/pdf/2412.14461">pdf</a>, <a href="https://arxiv.org/format/2412.14461">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> From Human Annotation to LLMs: SILICON Annotation Workflow for Management Research </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xiang Cheng</a>, <a href="/search/cs?searchtype=author&query=Mayya%2C+R">Raveesh Mayya</a>, <a href="/search/cs?searchtype=author&query=Sedoc%2C+J">Jo茫o Sedoc</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.14461v1-abstract-short" style="display: inline;"> Unstructured text data annotation and analysis are fundamental to management research, often relying on human annotators through crowdsourcing platforms. While Large Language Models (LLMs) promise to provide a cost-effective and efficient alternative to human annotation, there lacks a systematic workflow that evaluate when LLMs are suitable or how to proceed with LLM-based text annotation in a rep… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.14461v1-abstract-full').style.display = 'inline'; document.getElementById('2412.14461v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.14461v1-abstract-full" style="display: none;"> Unstructured text data annotation and analysis are fundamental to management research, often relying on human annotators through crowdsourcing platforms. While Large Language Models (LLMs) promise to provide a cost-effective and efficient alternative to human annotation, there lacks a systematic workflow that evaluate when LLMs are suitable or how to proceed with LLM-based text annotation in a reproducible manner. This paper addresses this methodological gap by introducing the ``SILICON" (\textbf{S}ystematic \textbf{I}nference with \textbf{L}LMs for \textbf{I}nformation \textbf{C}lassificati\textbf{o}n and \textbf{N}otation) workflow. The workflow integrates established principles of human annotation with systematic prompt optimization and model selection, addressing challenges such as developing robust annotation guidelines, establishing high-quality human baselines, optimizing prompts, and ensuring reproducibility across LLMs. We validate the SILICON workflow through seven case studies covering common management research tasks, including business proposal evaluation, dialog intent and breakdown analysis, review attribute detection. Our findings highlight the importance of validating annotation guideline agreement, the superiority of expert-developed human baselines over crowdsourced ones, the iterative nature of prompt optimization, and the necessity of testing multiple LLMs. Notably, we propose a regression-based methodology to empirically compare LLM outputs across prompts and models. Our workflow advances management research by establishing reproducible processes for LLM-based annotation that maintain scientific rigor. We provide practical guidance for researchers to effectively navigate the evolving landscape of generative AI tools effectively while maintaining transparency and reproducibility. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.14461v1-abstract-full').style.display = 'none'; document.getElementById('2412.14461v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.13196">arXiv:2412.13196</a> <span> [<a href="https://arxiv.org/pdf/2412.13196">pdf</a>, <a href="https://arxiv.org/format/2412.13196">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> ExBody2: Advanced Expressive Humanoid Whole-Body Control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ji%2C+M">Mazeyu Ji</a>, <a href="/search/cs?searchtype=author&query=Peng%2C+X">Xuanbin Peng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+F">Fangchen Liu</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jialong Li</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+G">Ge Yang</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xuxin Cheng</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaolong Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.13196v1-abstract-short" style="display: inline;"> This paper enables real-world humanoid robots to maintain stability while performing expressive motions like humans do. We propose ExBody2, a generalized whole-body tracking framework that can take any reference motion inputs and control the humanoid to mimic the motion. The model is trained in simulation with Reinforcement Learning and then transferred to the real world. It decouples keypoint tra… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.13196v1-abstract-full').style.display = 'inline'; document.getElementById('2412.13196v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.13196v1-abstract-full" style="display: none;"> This paper enables real-world humanoid robots to maintain stability while performing expressive motions like humans do. We propose ExBody2, a generalized whole-body tracking framework that can take any reference motion inputs and control the humanoid to mimic the motion. The model is trained in simulation with Reinforcement Learning and then transferred to the real world. It decouples keypoint tracking with velocity control, and effectively leverages a privileged teacher policy to distill precise mimic skills into the target student policy, which enables high-fidelity replication of dynamic movements such as running, crouching, dancing, and other challenging motions. We present a comprehensive qualitative and quantitative analysis of crucial design factors in the paper. We conduct our experiments on two humanoid platforms and demonstrate the superiority of our approach against state-of-the-arts, providing practical guidelines to pursue the extreme of whole-body control for humanoid robots. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.13196v1-abstract-full').style.display = 'none'; document.getElementById('2412.13196v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">website: https://exbody2.github.io</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.13061">arXiv:2412.13061</a> <span> [<a href="https://arxiv.org/pdf/2412.13061">pdf</a>, <a href="https://arxiv.org/format/2412.13061">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> VidTok: A Versatile and Open-Source Video Tokenizer </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tang%2C+A">Anni Tang</a>, <a href="/search/cs?searchtype=author&query=He%2C+T">Tianyu He</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+J">Junliang Guo</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xinle Cheng</a>, <a href="/search/cs?searchtype=author&query=Song%2C+L">Li Song</a>, <a href="/search/cs?searchtype=author&query=Bian%2C+J">Jiang Bian</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.13061v1-abstract-short" style="display: inline;"> Encoding video content into compact latent tokens has become a fundamental step in video generation and understanding, driven by the need to address the inherent redundancy in pixel-level representations. Consequently, there is a growing demand for high-performance, open-source video tokenizers as video-centric research gains prominence. We introduce VidTok, a versatile video tokenizer that delive… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.13061v1-abstract-full').style.display = 'inline'; document.getElementById('2412.13061v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.13061v1-abstract-full" style="display: none;"> Encoding video content into compact latent tokens has become a fundamental step in video generation and understanding, driven by the need to address the inherent redundancy in pixel-level representations. Consequently, there is a growing demand for high-performance, open-source video tokenizers as video-centric research gains prominence. We introduce VidTok, a versatile video tokenizer that delivers state-of-the-art performance in both continuous and discrete tokenizations. VidTok incorporates several key advancements over existing approaches: 1) model architecture such as convolutional layers and up/downsampling modules; 2) to address the training instability and codebook collapse commonly associated with conventional Vector Quantization (VQ), we integrate Finite Scalar Quantization (FSQ) into discrete video tokenization; 3) improved training strategies, including a two-stage training process and the use of reduced frame rates. By integrating these advancements, VidTok achieves substantial improvements over existing methods, demonstrating superior performance across multiple metrics, including PSNR, SSIM, LPIPS, and FVD, under standardized evaluation settings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.13061v1-abstract-full').style.display = 'none'; document.getElementById('2412.13061v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Code & Models: https://github.com/microsoft/VidTok</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.12550">arXiv:2412.12550</a> <span> [<a href="https://arxiv.org/pdf/2412.12550">pdf</a>, <a href="https://arxiv.org/format/2412.12550">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Consistent Diffusion: Denoising Diffusion Model with Data-Consistent Training for Image Restoration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xinlong Cheng</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+T">Tiantian Cao</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+G">Guoan Cheng</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+B">Bangxuan Huang</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+X">Xinghan Tian</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Ye Wang</a>, <a href="/search/cs?searchtype=author&query=He%2C+X">Xiaoyu He</a>, <a href="/search/cs?searchtype=author&query=Li%2C+W">Weixin Li</a>, <a href="/search/cs?searchtype=author&query=Xue%2C+T">Tianfan Xue</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+X">Xuan Dong</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.12550v1-abstract-short" style="display: inline;"> In this work, we address the limitations of denoising diffusion models (DDMs) in image restoration tasks, particularly the shape and color distortions that can compromise image quality. While DDMs have demonstrated a promising performance in many applications such as text-to-image synthesis, their effectiveness in image restoration is often hindered by shape and color distortions. We observe that… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.12550v1-abstract-full').style.display = 'inline'; document.getElementById('2412.12550v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.12550v1-abstract-full" style="display: none;"> In this work, we address the limitations of denoising diffusion models (DDMs) in image restoration tasks, particularly the shape and color distortions that can compromise image quality. While DDMs have demonstrated a promising performance in many applications such as text-to-image synthesis, their effectiveness in image restoration is often hindered by shape and color distortions. We observe that these issues arise from inconsistencies between the training and testing data used by DDMs. Based on our observation, we propose a novel training method, named data-consistent training, which allows the DDMs to access images with accumulated errors during training, thereby ensuring the model to learn to correct these errors. Experimental results show that, across five image restoration tasks, our method has significant improvements over state-of-the-art methods while effectively minimizing distortions and preserving image fidelity. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.12550v1-abstract-full').style.display = 'none'; document.getElementById('2412.12550v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.09874">arXiv:2412.09874</a> <span> [<a href="https://arxiv.org/pdf/2412.09874">pdf</a>, <a href="https://arxiv.org/format/2412.09874">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Can Students Beyond The Teacher? Distilling Knowledge from Teacher's Bias </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jianhua Zhang</a>, <a href="/search/cs?searchtype=author&query=Gao%2C+Y">Yi Gao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+R">Ruyu Liu</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xu Cheng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+H">Houxiang Zhang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+S">Shengyong Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.09874v1-abstract-short" style="display: inline;"> Knowledge distillation (KD) is a model compression technique that transfers knowledge from a large teacher model to a smaller student model to enhance its performance. Existing methods often assume that the student model is inherently inferior to the teacher model. However, we identify that the fundamental issue affecting student performance is the bias transferred by the teacher. Current KD frame… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.09874v1-abstract-full').style.display = 'inline'; document.getElementById('2412.09874v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.09874v1-abstract-full" style="display: none;"> Knowledge distillation (KD) is a model compression technique that transfers knowledge from a large teacher model to a smaller student model to enhance its performance. Existing methods often assume that the student model is inherently inferior to the teacher model. However, we identify that the fundamental issue affecting student performance is the bias transferred by the teacher. Current KD frameworks transmit both right and wrong knowledge, introducing bias that misleads the student model. To address this issue, we propose a novel strategy to rectify bias and greatly improve the student model's performance. Our strategy involves three steps: First, we differentiate knowledge and design a bias elimination method to filter out biases, retaining only the right knowledge for the student model to learn. Next, we propose a bias rectification method to rectify the teacher model's wrong predictions, fundamentally addressing bias interference. The student model learns from both the right knowledge and the rectified biases, greatly improving its prediction accuracy. Additionally, we introduce a dynamic learning approach with a loss function that updates weights dynamically, allowing the student model to quickly learn right knowledge-based easy tasks initially and tackle hard tasks corresponding to biases later, greatly enhancing the student model's learning efficiency. To the best of our knowledge, this is the first strategy enabling the student model to surpass the teacher model. Experiments demonstrate that our strategy, as a plug-and-play module, is versatile across various mainstream KD frameworks. We will release our code after the paper is accepted. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.09874v1-abstract-full').style.display = 'none'; document.getElementById('2412.09874v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.09413">arXiv:2412.09413</a> <span> [<a href="https://arxiv.org/pdf/2412.09413">pdf</a>, <a href="https://arxiv.org/format/2412.09413">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Imitate, Explore, and Self-Improve: A Reproduction Report on Slow-thinking Reasoning Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Min%2C+Y">Yingqian Min</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+Z">Zhipeng Chen</a>, <a href="/search/cs?searchtype=author&query=Jiang%2C+J">Jinhao Jiang</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+J">Jie Chen</a>, <a href="/search/cs?searchtype=author&query=Deng%2C+J">Jia Deng</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+Y">Yiwen Hu</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+Y">Yiru Tang</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+J">Jiapeng Wang</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xiaoxue Cheng</a>, <a href="/search/cs?searchtype=author&query=Song%2C+H">Huatong Song</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+W+X">Wayne Xin Zhao</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+Z">Zheng Liu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Z">Zhongyuan Wang</a>, <a href="/search/cs?searchtype=author&query=Wen%2C+J">Ji-Rong Wen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.09413v2-abstract-short" style="display: inline;"> Recently, slow-thinking reasoning systems, such as o1, have demonstrated remarkable capabilities in solving complex reasoning tasks. These systems typically engage in an extended thinking process before responding to a query, allowing them to generate more thorough, accurate, and well-reasoned solutions. These systems are primarily developed and maintained by industry, with their core techniques n… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.09413v2-abstract-full').style.display = 'inline'; document.getElementById('2412.09413v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.09413v2-abstract-full" style="display: none;"> Recently, slow-thinking reasoning systems, such as o1, have demonstrated remarkable capabilities in solving complex reasoning tasks. These systems typically engage in an extended thinking process before responding to a query, allowing them to generate more thorough, accurate, and well-reasoned solutions. These systems are primarily developed and maintained by industry, with their core techniques not publicly disclosed. In response, an increasing number of studies from the research community aim to explore the technical foundations underlying these powerful reasoning systems. Building on these prior efforts, this paper presents a reproduction report on implementing o1-like reasoning systems. We introduce an ``imitate, explore, and self-improve'' framework, denoted as \textbf{STILL-2}, as our primary technical approach to train the reasoning model. In the initial phase, we use distilled long-form thought data to fine-tune the reasoning model, enabling it to invoke a slow-thinking mode. The model is then encouraged to explore challenging problems by generating multiple rollouts, which can result in increasingly more high-quality trajectories that lead to correct answers. Furthermore, the model undergoes self-improvement by iteratively refining its training dataset. To verify the effectiveness of this approach, we conduct extensive experiments on three challenging benchmarks. The experimental results demonstrate that our approach achieves competitive performance compared to industry-level reasoning systems on these benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.09413v2-abstract-full').style.display = 'none'; document.getElementById('2412.09413v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Technical Report on Slow Thinking with LLMs: Part II</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.09349">arXiv:2412.09349</a> <span> [<a href="https://arxiv.org/pdf/2412.09349">pdf</a>, <a href="https://arxiv.org/format/2412.09349">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> DisPose: Disentangling Pose Guidance for Controllable Human Image Animation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+H">Hongxiang Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yaowei Li</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+Y">Yuhang Yang</a>, <a href="/search/cs?searchtype=author&query=Cao%2C+J">Junjie Cao</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+Z">Zhihong Zhu</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xuxin Cheng</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+L">Long Chen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.09349v2-abstract-short" style="display: inline;"> Controllable human image animation aims to generate videos from reference images using driving videos. Due to the limited control signals provided by sparse guidance (e.g., skeleton pose), recent works have attempted to introduce additional dense conditions (e.g., depth map) to ensure motion alignment. However, such strict dense guidance impairs the quality of the generated video when the body sha… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.09349v2-abstract-full').style.display = 'inline'; document.getElementById('2412.09349v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.09349v2-abstract-full" style="display: none;"> Controllable human image animation aims to generate videos from reference images using driving videos. Due to the limited control signals provided by sparse guidance (e.g., skeleton pose), recent works have attempted to introduce additional dense conditions (e.g., depth map) to ensure motion alignment. However, such strict dense guidance impairs the quality of the generated video when the body shape of the reference character differs significantly from that of the driving video. In this paper, we present DisPose to mine more generalizable and effective control signals without additional dense input, which disentangles the sparse skeleton pose in human image animation into motion field guidance and keypoint correspondence. Specifically, we generate a dense motion field from a sparse motion field and the reference image, which provides region-level dense guidance while maintaining the generalization of the sparse pose control. We also extract diffusion features corresponding to pose keypoints from the reference image, and then these point features are transferred to the target pose to provide distinct identity information. To seamlessly integrate into existing models, we propose a plug-and-play hybrid ControlNet that improves the quality and consistency of generated videos while freezing the existing model parameters. Extensive qualitative and quantitative experiments demonstrate the superiority of DisPose compared to current methods. Code: \href{https://github.com/lihxxx/DisPose}{https://github.com/lihxxx/DisPose}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.09349v2-abstract-full').style.display = 'none'; document.getElementById('2412.09349v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.08979">arXiv:2412.08979</a> <span> [<a href="https://arxiv.org/pdf/2412.08979">pdf</a>, <a href="https://arxiv.org/format/2412.08979">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> A Wander Through the Multimodal Landscape: Efficient Transfer Learning via Low-rank Sequence Multimodal Adapter </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Guo%2C+Z">Zirun Guo</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xize Cheng</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+Y">Yangyang Wu</a>, <a href="/search/cs?searchtype=author&query=Jin%2C+T">Tao Jin</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.08979v1-abstract-short" style="display: inline;"> Efficient transfer learning methods such as adapter-based methods have shown great success in unimodal models and vision-language models. However, existing methods have two main challenges in fine-tuning multimodal models. Firstly, they are designed for vision-language tasks and fail to extend to situations where there are more than two modalities. Secondly, they exhibit limited exploitation of in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.08979v1-abstract-full').style.display = 'inline'; document.getElementById('2412.08979v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.08979v1-abstract-full" style="display: none;"> Efficient transfer learning methods such as adapter-based methods have shown great success in unimodal models and vision-language models. However, existing methods have two main challenges in fine-tuning multimodal models. Firstly, they are designed for vision-language tasks and fail to extend to situations where there are more than two modalities. Secondly, they exhibit limited exploitation of interactions between modalities and lack efficiency. To address these issues, in this paper, we propose the loW-rank sequence multimodal adapter (Wander). We first use the outer product to fuse the information from different modalities in an element-wise way effectively. For efficiency, we use CP decomposition to factorize tensors into rank-one components and achieve substantial parameter reduction. Furthermore, we implement a token-level low-rank decomposition to extract more fine-grained features and sequence relationships between modalities. With these designs, Wander enables token-level interactions between sequences of different modalities in a parameter-efficient way. We conduct extensive experiments on datasets with different numbers of modalities, where Wander outperforms state-of-the-art efficient transfer learning methods consistently. The results fully demonstrate the effectiveness, efficiency and universality of Wander. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.08979v1-abstract-full').style.display = 'none'; document.getElementById('2412.08979v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted at AAAI 2025</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.07773">arXiv:2412.07773</a> <span> [<a href="https://arxiv.org/pdf/2412.07773">pdf</a>, <a href="https://arxiv.org/format/2412.07773">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Mobile-TeleVision: Predictive Motion Priors for Humanoid Whole-Body Control </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lu%2C+C">Chenhao Lu</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xuxin Cheng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+J">Jialong Li</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+S">Shiqi Yang</a>, <a href="/search/cs?searchtype=author&query=Ji%2C+M">Mazeyu Ji</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+C">Chengjing Yuan</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+G">Ge Yang</a>, <a href="/search/cs?searchtype=author&query=Yi%2C+S">Sha Yi</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+X">Xiaolong Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.07773v1-abstract-short" style="display: inline;"> Humanoid robots require both robust lower-body locomotion and precise upper-body manipulation. While recent Reinforcement Learning (RL) approaches provide whole-body loco-manipulation policies, they lack precise manipulation with high DoF arms. In this paper, we propose decoupling upper-body control from locomotion, using inverse kinematics (IK) and motion retargeting for precise manipulation, whi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.07773v1-abstract-full').style.display = 'inline'; document.getElementById('2412.07773v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.07773v1-abstract-full" style="display: none;"> Humanoid robots require both robust lower-body locomotion and precise upper-body manipulation. While recent Reinforcement Learning (RL) approaches provide whole-body loco-manipulation policies, they lack precise manipulation with high DoF arms. In this paper, we propose decoupling upper-body control from locomotion, using inverse kinematics (IK) and motion retargeting for precise manipulation, while RL focuses on robust lower-body locomotion. We introduce PMP (Predictive Motion Priors), trained with Conditional Variational Autoencoder (CVAE) to effectively represent upper-body motions. The locomotion policy is trained conditioned on this upper-body motion representation, ensuring that the system remains robust with both manipulation and locomotion. We show that CVAE features are crucial for stability and robustness, and significantly outperforms RL-based whole-body control in precise manipulation. With precise upper-body motion and robust lower-body locomotion control, operators can remotely control the humanoid to walk around and explore different environments, while performing diverse manipulation tasks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.07773v1-abstract-full').style.display = 'none'; document.getElementById('2412.07773v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.05512">arXiv:2412.05512</a> <span> [<a href="https://arxiv.org/pdf/2412.05512">pdf</a>, <a href="https://arxiv.org/format/2412.05512">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Partially Synchronous BFT Consensus Made Practical in Wireless Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liu%2C+S">Shuo Liu</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+M">Minghui Xu</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+Y">Yuezhou Zheng</a>, <a href="/search/cs?searchtype=author&query=Zou%2C+Y">Yifei Zou</a>, <a href="/search/cs?searchtype=author&query=Qiu%2C+W">Wangjie Qiu</a>, <a href="/search/cs?searchtype=author&query=Qu%2C+G">Gang Qu</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xiuzhen Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.05512v1-abstract-short" style="display: inline;"> Consensus is becoming increasingly important in wireless networks. Partially synchronous BFT consensus, a significant branch of consensus, has made considerable progress in wired networks. However, its implementation in wireless networks, especially in dynamic ad hoc wireless networks, remains challenging. Existing wireless synchronous consensus protocols, despite being well-developed, are not rea… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.05512v1-abstract-full').style.display = 'inline'; document.getElementById('2412.05512v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.05512v1-abstract-full" style="display: none;"> Consensus is becoming increasingly important in wireless networks. Partially synchronous BFT consensus, a significant branch of consensus, has made considerable progress in wired networks. However, its implementation in wireless networks, especially in dynamic ad hoc wireless networks, remains challenging. Existing wireless synchronous consensus protocols, despite being well-developed, are not readily adaptable to partially synchronous settings. Additionally, reliable communication, a cornerstone of BFT consensus, can lead to high message and time complexity in wireless networks. To address these challenges, we propose a wireless communication protocol called ReduceCatch (Reduce and Catch) that supports reliable 1-to-N, N-to-1, and N-to-N communications. We employ ReduceCatch to tailor three partially synchronous BFT consensus protocols (PBFT, Tendermint, and HotStuff) for seamless adaptation from wired to ad hoc wireless networks. To evaluate the performance of the ReduceCatch-enabled consensus protocols, we develop a three-layer wireless consensus testbed, based on which we implement 20 distinct consensus protocols and measure their latency and throughput. The experimental results demonstrate the superiority of the ReduceCatch-based consensus protocol in terms of latency and throughput. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.05512v1-abstract-full').style.display = 'none'; document.getElementById('2412.05512v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to IEEE INFOCOM 2025, 10 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.05502">arXiv:2412.05502</a> <span> [<a href="https://arxiv.org/pdf/2412.05502">pdf</a>, <a href="https://arxiv.org/format/2412.05502">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> EC-Chain: Cost-Effective Storage Solution for Permissionless Blockchains </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Xu%2C+M">Minghui Xu</a>, <a href="/search/cs?searchtype=author&query=Guo%2C+H">Hechuan Guo</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+Y">Ye Cheng</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+C">Chunchi Liu</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+D">Dongxiao Yu</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xiuzhen Cheng</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.05502v1-abstract-short" style="display: inline;"> Permissionless blockchains face considerable challenges due to increasing storage demands, driven by the proliferation of Decentralized Applications (DApps). This paper introduces EC-Chain, a cost-effective storage solution for permissionless blockchains. EC-Chain reduces storage overheads of ledger and state data, which comprise blockchain data. For ledger data, EC-Chain refines existing erasure… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.05502v1-abstract-full').style.display = 'inline'; document.getElementById('2412.05502v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.05502v1-abstract-full" style="display: none;"> Permissionless blockchains face considerable challenges due to increasing storage demands, driven by the proliferation of Decentralized Applications (DApps). This paper introduces EC-Chain, a cost-effective storage solution for permissionless blockchains. EC-Chain reduces storage overheads of ledger and state data, which comprise blockchain data. For ledger data, EC-Chain refines existing erasure coding-based storage optimization techniques by incorporating batch encoding and height-based encoding. We also introduce an easy-to-implement dual-trie state management system that enhances state storage and retrieval through state expiry, mining, and creation procedures. To ensure data availability in permissionless environments, EC-Chain introduces a network maintenance scheme tailored for dynamism. Collectively, these contributions allow EC-Chain to provide an effective solution to the storage challenges faced by permissionless blockchains. Our evaluation demonstrates that EC-Chain can achieve a storage reduction of over \(90\%\) compared to native Ethereum Geth. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.05502v1-abstract-full').style.display = 'none'; document.getElementById('2412.05502v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted to IEEE INFOCOM 2025, 10 pages, 9 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.00131">arXiv:2412.00131</a> <span> [<a href="https://arxiv.org/pdf/2412.00131">pdf</a>, <a href="https://arxiv.org/format/2412.00131">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Open-Sora Plan: Open-Source Large Video Generation Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lin%2C+B">Bin Lin</a>, <a href="/search/cs?searchtype=author&query=Ge%2C+Y">Yunyang Ge</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xinhua Cheng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zongjian Li</a>, <a href="/search/cs?searchtype=author&query=Zhu%2C+B">Bin Zhu</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+S">Shaodong Wang</a>, <a href="/search/cs?searchtype=author&query=He%2C+X">Xianyi He</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+Y">Yang Ye</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+S">Shenghai Yuan</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+L">Liuhan Chen</a>, <a href="/search/cs?searchtype=author&query=Jia%2C+T">Tanghui Jia</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Junwu Zhang</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+Z">Zhenyu Tang</a>, <a href="/search/cs?searchtype=author&query=Pang%2C+Y">Yatian Pang</a>, <a href="/search/cs?searchtype=author&query=She%2C+B">Bin She</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+C">Cen Yan</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+Z">Zhiheng Hu</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+X">Xiaoyi Dong</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+L">Lin Chen</a>, <a href="/search/cs?searchtype=author&query=Pan%2C+Z">Zhang Pan</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+X">Xing Zhou</a>, <a href="/search/cs?searchtype=author&query=Dong%2C+S">Shaoling Dong</a>, <a href="/search/cs?searchtype=author&query=Tian%2C+Y">Yonghong Tian</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+L">Li Yuan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.00131v1-abstract-short" style="display: inline;"> We introduce Open-Sora Plan, an open-source project that aims to contribute a large generation model for generating desired high-resolution videos with long durations based on various user inputs. Our project comprises multiple components for the entire video generation process, including a Wavelet-Flow Variational Autoencoder, a Joint Image-Video Skiparse Denoiser, and various condition controlle… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00131v1-abstract-full').style.display = 'inline'; document.getElementById('2412.00131v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.00131v1-abstract-full" style="display: none;"> We introduce Open-Sora Plan, an open-source project that aims to contribute a large generation model for generating desired high-resolution videos with long durations based on various user inputs. Our project comprises multiple components for the entire video generation process, including a Wavelet-Flow Variational Autoencoder, a Joint Image-Video Skiparse Denoiser, and various condition controllers. Moreover, many assistant strategies for efficient training and inference are designed, and a multi-dimensional data curation pipeline is proposed for obtaining desired high-quality data. Benefiting from efficient thoughts, our Open-Sora Plan achieves impressive video generation results in both qualitative and quantitative evaluations. We hope our careful design and practical experience can inspire the video generation research community. All our codes and model weights are publicly available at \url{https://github.com/PKU-YuanGroup/Open-Sora-Plan}. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.00131v1-abstract-full').style.display = 'none'; document.getElementById('2412.00131v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">v1.3</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.18230">arXiv:2411.18230</a> <span> [<a href="https://arxiv.org/pdf/2411.18230">pdf</a>, <a href="https://arxiv.org/format/2411.18230">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Dependency-Aware CAV Task Scheduling via Diffusion-Based Reinforcement Learning </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xiang Cheng</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+Z">Zhi Mao</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+Y">Ying Wang</a>, <a href="/search/cs?searchtype=author&query=Wu%2C+W">Wen Wu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.18230v1-abstract-short" style="display: inline;"> In this paper, we propose a novel dependency-aware task scheduling strategy for dynamic unmanned aerial vehicle-assisted connected autonomous vehicles (CAVs). Specifically, different computation tasks of CAVs consisting of multiple dependency subtasks are judiciously assigned to nearby CAVs or the base station for promptly completing tasks. Therefore, we formulate a joint scheduling priority and s… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18230v1-abstract-full').style.display = 'inline'; document.getElementById('2411.18230v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.18230v1-abstract-full" style="display: none;"> In this paper, we propose a novel dependency-aware task scheduling strategy for dynamic unmanned aerial vehicle-assisted connected autonomous vehicles (CAVs). Specifically, different computation tasks of CAVs consisting of multiple dependency subtasks are judiciously assigned to nearby CAVs or the base station for promptly completing tasks. Therefore, we formulate a joint scheduling priority and subtask assignment optimization problem with the objective of minimizing the average task completion time. The problem aims at improving the long-term system performance, which is reformulated as a Markov decision process. To solve the problem, we further propose a diffusion-based reinforcement learning algorithm, named Synthetic DDQN based Subtasks Scheduling, which can make adaptive task scheduling decision in real time. A diffusion model-based synthetic experience replay is integrated into the reinforcement learning framework, which can generate sufficient synthetic data in experience replay buffer, thereby significantly accelerating convergence and improving sample efficiency. Simulation results demonstrate the effectiveness of the proposed algorithm on reducing task completion time, comparing to benchmark schemes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.18230v1-abstract-full').style.display = 'none'; document.getElementById('2411.18230v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 5 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.17558">arXiv:2411.17558</a> <span> [<a href="https://arxiv.org/pdf/2411.17558">pdf</a>, <a href="https://arxiv.org/format/2411.17558">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Natural Language Understanding and Inference with MLLM in Visual Question Answering: A Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kuang%2C+J">Jiayi Kuang</a>, <a href="/search/cs?searchtype=author&query=Xie%2C+J">Jingyou Xie</a>, <a href="/search/cs?searchtype=author&query=Luo%2C+H">Haohao Luo</a>, <a href="/search/cs?searchtype=author&query=Li%2C+R">Ronghao Li</a>, <a href="/search/cs?searchtype=author&query=Xu%2C+Z">Zhe Xu</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xianfeng Cheng</a>, <a href="/search/cs?searchtype=author&query=Li%2C+Y">Yinghui Li</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+X">Xika Lin</a>, <a href="/search/cs?searchtype=author&query=Shen%2C+Y">Ying Shen</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.17558v1-abstract-short" style="display: inline;"> Visual Question Answering (VQA) is a challenge task that combines natural language processing and computer vision techniques and gradually becomes a benchmark test task in multimodal large language models (MLLMs). The goal of our survey is to provide an overview of the development of VQA and a detailed description of the latest models with high timeliness. This survey gives an up-to-date synthesis… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17558v1-abstract-full').style.display = 'inline'; document.getElementById('2411.17558v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.17558v1-abstract-full" style="display: none;"> Visual Question Answering (VQA) is a challenge task that combines natural language processing and computer vision techniques and gradually becomes a benchmark test task in multimodal large language models (MLLMs). The goal of our survey is to provide an overview of the development of VQA and a detailed description of the latest models with high timeliness. This survey gives an up-to-date synthesis of natural language understanding of images and text, as well as the knowledge reasoning module based on image-question information on the core VQA tasks. In addition, we elaborate on recent advances in extracting and fusing modal information with vision-language pretraining models and multimodal large language models in VQA. We also exhaustively review the progress of knowledge reasoning in VQA by detailing the extraction of internal knowledge and the introduction of external knowledge. Finally, we present the datasets of VQA and different evaluation metrics and discuss possible directions for future work. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17558v1-abstract-full').style.display = 'none'; document.getElementById('2411.17558v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.17459">arXiv:2411.17459</a> <span> [<a href="https://arxiv.org/pdf/2411.17459">pdf</a>, <a href="https://arxiv.org/format/2411.17459">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> WF-VAE: Enhancing Video VAE by Wavelet-Driven Energy Flow for Latent Video Diffusion Model </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+Z">Zongjian Li</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+B">Bin Lin</a>, <a href="/search/cs?searchtype=author&query=Ye%2C+Y">Yang Ye</a>, <a href="/search/cs?searchtype=author&query=Chen%2C+L">Liuhan Chen</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xinhua Cheng</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+S">Shenghai Yuan</a>, <a href="/search/cs?searchtype=author&query=Yuan%2C+L">Li Yuan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.17459v2-abstract-short" style="display: inline;"> Video Variational Autoencoder (VAE) encodes videos into a low-dimensional latent space, becoming a key component of most Latent Video Diffusion Models (LVDMs) to reduce model training costs. However, as the resolution and duration of generated videos increase, the encoding cost of Video VAEs becomes a limiting bottleneck in training LVDMs. Moreover, the block-wise inference method adopted by most… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17459v2-abstract-full').style.display = 'inline'; document.getElementById('2411.17459v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.17459v2-abstract-full" style="display: none;"> Video Variational Autoencoder (VAE) encodes videos into a low-dimensional latent space, becoming a key component of most Latent Video Diffusion Models (LVDMs) to reduce model training costs. However, as the resolution and duration of generated videos increase, the encoding cost of Video VAEs becomes a limiting bottleneck in training LVDMs. Moreover, the block-wise inference method adopted by most LVDMs can lead to discontinuities of latent space when processing long-duration videos. The key to addressing the computational bottleneck lies in decomposing videos into distinct components and efficiently encoding the critical information. Wavelet transform can decompose videos into multiple frequency-domain components and improve the efficiency significantly, we thus propose Wavelet Flow VAE (WF-VAE), an autoencoder that leverages multi-level wavelet transform to facilitate low-frequency energy flow into latent representation. Furthermore, we introduce a method called Causal Cache, which maintains the integrity of latent space during block-wise inference. Compared to state-of-the-art video VAEs, WF-VAE demonstrates superior performance in both PSNR and LPIPS metrics, achieving 2x higher throughput and 4x lower memory consumption while maintaining competitive reconstruction quality. Our code and models are available at https://github.com/PKU-YuanGroup/WF-VAE. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.17459v2-abstract-full').style.display = 'none'; document.getElementById('2411.17459v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.15875">arXiv:2411.15875</a> <span> [<a href="https://arxiv.org/pdf/2411.15875">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1007/s10726-024-09911-y">10.1007/s10726-024-09911-y <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Achieving employees agile response in e-governance: Exploring the synergy of technology and group collaboration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bao%2C+Y">Ying Bao</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+X">Xusen Cheng</a>, <a href="/search/cs?searchtype=author&query=Su%2C+L">Linlin Su</a>, <a href="/search/cs?searchtype=author&query=Zarifis%2C+A">Alex Zarifis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.15875v1-abstract-short" style="display: inline;"> The transformation of technology and collaboration methods driven by the e-government system forces government employees to reconsider their daily workflow and collaboration with colleagues. Despite the extensive existing knowledge of technology usage and collaboration, there are limitations in explaining the synergy between technology usage and group collaboration in achieving agile response from… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15875v1-abstract-full').style.display = 'inline'; document.getElementById('2411.15875v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.15875v1-abstract-full" style="display: none;"> The transformation of technology and collaboration methods driven by the e-government system forces government employees to reconsider their daily workflow and collaboration with colleagues. Despite the extensive existing knowledge of technology usage and collaboration, there are limitations in explaining the synergy between technology usage and group collaboration in achieving agile response from the perspective of government employees, particularly in the e-government setting. To address these challenges, this study provides a holistic understanding of the successful pathway to agile response in e-governance from the perspective of government employees. This study explores a dual path to achieve agile response in e-governance through qualitative analysis, involving 34 in-depth semi-structured interviews with government employees in several government sectors in China. By employing three rounds of coding processes and adopting Interpretative Structural Modeling (ISM), this study identifies the five-layer mechanisms leading to agile response in e-governance, considering both government employee technology usage and group collaboration perspectives. Findings of this study provides suggestions and implications for achieving agile response in e-governance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.15875v1-abstract-full').style.display = 'none'; document.getElementById('2411.15875v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> H.0; J.0; K.4 </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Group Decision and Negotiation (2024) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.14572">arXiv:2411.14572</a> <span> [<a href="https://arxiv.org/pdf/2411.14572">pdf</a>, <a href="https://arxiv.org/format/2411.14572">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Towards Knowledge Checking in Retrieval-augmented Generation: A Representation Perspective </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zeng%2C+S">Shenglai Zeng</a>, <a href="/search/cs?searchtype=author&query=Zhang%2C+J">Jiankun Zhang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+B">Bingheng Li</a>, <a href="/search/cs?searchtype=author&query=Lin%2C+Y">Yuping Lin</a>, <a href="/search/cs?searchtype=author&query=Zheng%2C+T">Tianqi Zheng</a>, <a href="/search/cs?searchtype=author&query=Everaert%2C+D">Dante Everaert</a>, <a href="/search/cs?searchtype=author&query=Lu%2C+H">Hanqing Lu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hui Liu</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+H">Hui Liu</a>, <a href="/search/cs?searchtype=author&query=Xing%2C+Y">Yue Xing</a>, <a href="/search/cs?searchtype=author&query=Cheng%2C+M+X">Monica Xiao Cheng</a>, <a href="/search/cs?searchtype=author&query=Tang%2C+J">Jiliang Tang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.14572v1-abstract-short" style="display: inline;"> Retrieval-Augmented Generation (RAG) systems have shown promise in enhancing the performance of Large Language Models (LLMs). However, these systems face challenges in effectively integrating external knowledge with the LLM's internal knowledge, often leading to issues with misleading or unhelpful information. This work aims to provide a systematic study on knowledge checking in RAG systems. We co… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14572v1-abstract-full').style.display = 'inline'; document.getElementById('2411.14572v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.14572v1-abstract-full" style="display: none;"> Retrieval-Augmented Generation (RAG) systems have shown promise in enhancing the performance of Large Language Models (LLMs). However, these systems face challenges in effectively integrating external knowledge with the LLM's internal knowledge, often leading to issues with misleading or unhelpful information. This work aims to provide a systematic study on knowledge checking in RAG systems. We conduct a comprehensive analysis of LLM representation behaviors and demonstrate the significance of using representations in knowledge checking. Motivated by the findings, we further develop representation-based classifiers for knowledge filtering. We show substantial improvements in RAG performance, even when dealing with noisy knowledge databases. Our study provides new insights into leveraging LLM representations for enhancing the reliability and effectiveness of RAG systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.14572v1-abstract-full').style.display = 'none'; document.getElementById('2411.14572v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> </ol> <nav class="pagination is-small is-centered breathe-horizontal" role="navigation" aria-label="pagination"> <a href="" class="pagination-previous is-invisible">Previous </a> <a href="/search/?searchtype=author&query=Cheng%2C+X&start=50" class="pagination-next" >Next </a> <ul class="pagination-list"> <li> <a href="/search/?searchtype=author&query=Cheng%2C+X&start=0" class="pagination-link is-current" aria-label="Goto page 1">1 </a> </li> <li> <a href="/search/?searchtype=author&query=Cheng%2C+X&start=50" class="pagination-link " aria-label="Page 2" aria-current="page">2 </a> </li> <li> <a href="/search/?searchtype=author&query=Cheng%2C+X&start=100" class="pagination-link " aria-label="Page 3" aria-current="page">3 </a> </li> <li> <a href="/search/?searchtype=author&query=Cheng%2C+X&start=150" class="pagination-link " aria-label="Page 4" aria-current="page">4 </a> </li> <li> <a href="/search/?searchtype=author&query=Cheng%2C+X&start=200" class="pagination-link " aria-label="Page 5" aria-current="page">5 </a> </li> <li><span class="pagination-ellipsis">…</span></li> </ul> </nav> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>