CINXE.COM
Information Retrieval
<!DOCTYPE html> <html lang="en"> <head> <title>Information Retrieval </title> <meta name="viewport" content="width=device-width, initial-scale=1"> <link rel="apple-touch-icon" sizes="180x180" href="/static/browse/0.3.4/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="/static/browse/0.3.4/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="/static/browse/0.3.4/images/icons/favicon-16x16.png"> <link rel="manifest" href="/static/browse/0.3.4/images/icons/site.webmanifest"> <link rel="mask-icon" href="/static/browse/0.3.4/images/icons/safari-pinned-tab.svg" color="#5bbad5"> <meta name="msapplication-TileColor" content="#da532c"> <meta name="theme-color" content="#ffffff"> <link rel="stylesheet" type="text/css" media="screen" href="/static/browse/0.3.4/css/arXiv.css?v=20241206" /> <link rel="stylesheet" type="text/css" media="print" href="/static/browse/0.3.4/css/arXiv-print.css?v=20200611" /> <link rel="stylesheet" type="text/css" media="screen" href="/static/browse/0.3.4/css/browse_search.css" /> <script language="javascript" src="/static/browse/0.3.4/js/accordion.js" /></script> <script src="/static/browse/0.3.4/js/mathjaxToggle.min.js" type="text/javascript"></script> <script type="text/javascript" language="javascript">mathjaxToggle();</script> </head> <body class="with-cu-identity"> <div class="flex-wrap-footer"> <header> <a href="#content" class="is-sr-only">Skip to main content</a> <!-- start desktop header --> <div class="columns is-vcentered is-hidden-mobile" id="cu-identity"> <div class="column" id="cu-logo"> <a href="https://www.cornell.edu/"><img src="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg" alt="Cornell University" /></a> </div><div class="column" id="support-ack"> <span id="support-ack-url">We gratefully acknowledge support from the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors.</span> <a href="https://info.arxiv.org/about/donate.html" class="btn-header-donate">Donate</a> </div> </div> <div id="header" class="is-hidden-mobile"> <a aria-hidden="true" tabindex="-1" href="/IgnoreMe"></a> <div class="header-breadcrumbs"> <a href="/"><img src="/static/browse/0.3.4/images/arxiv-logo-one-color-white.svg" alt="arxiv logo" style="height:40px;"/></a> <span>></span> <a href="/list/cs.IR/recent">cs.IR</a> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div><!-- /end desktop header --> <div class="mobile-header"> <div class="columns is-mobile"> <div class="column logo-arxiv"><a href="https://arxiv.org/"><img src="/static/browse/0.3.4/images/arxiv-logomark-small-white.svg" alt="arXiv logo" style="height:60px;" /></a></div> <div class="column logo-cornell"><a href="https://www.cornell.edu/"> <picture> <source media="(min-width: 501px)" srcset="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg 400w" sizes="400w" /> <source srcset="/static/browse/0.3.4/images/icons/cu/cornell_seal_simple_black.svg 2x" /> <img src="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg" alt="Cornell University Logo" /> </picture> </a></div> <div class="column nav" id="toggle-container" role="menubar"> <button class="toggle-control"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-white"><title>open search</title><path d="M505 442.7L405.3 343c-4.5-4.5-10.6-7-17-7H372c27.6-35.3 44-79.7 44-128C416 93.1 322.9 0 208 0S0 93.1 0 208s93.1 208 208 208c48.3 0 92.7-16.4 128-44v16.3c0 6.4 2.5 12.5 7 17l99.7 99.7c9.4 9.4 24.6 9.4 33.9 0l28.3-28.3c9.4-9.4 9.4-24.6.1-34zM208 336c-70.7 0-128-57.2-128-128 0-70.7 57.2-128 128-128 70.7 0 128 57.2 128 128 0 70.7-57.2 128-128 128z"/></svg></button> <div class="mobile-toggle-block toggle-target"> <form class="mobile-search-form" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <input class="input" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <input type="hidden" name="source" value="header"> <input type="hidden" name="searchtype" value="all"> <button class="button">GO</button> </div> </form> </div> <button class="toggle-control"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-white" role="menu"><title>open navigation menu</title><path d="M16 132h416c8.837 0 16-7.163 16-16V76c0-8.837-7.163-16-16-16H16C7.163 60 0 67.163 0 76v40c0 8.837 7.163 16 16 16zm0 160h416c8.837 0 16-7.163 16-16v-40c0-8.837-7.163-16-16-16H16c-8.837 0-16 7.163-16 16v40c0 8.837 7.163 16 16 16zm0 160h416c8.837 0 16-7.163 16-16v-40c0-8.837-7.163-16-16-16H16c-8.837 0-16 7.163-16 16v40c0 8.837 7.163 16 16 16z"/ ></svg></button> <div class="mobile-toggle-block toggle-target"> <nav class="mobile-menu" aria-labelledby="mobilemenulabel"> <h2 id="mobilemenulabel">quick links</h2> <ul> <li><a href="https://arxiv.org/login">Login</a></li> <li><a href="https://info.arxiv.org/help">Help Pages</a></li> <li><a href="https://info.arxiv.org/about">About</a></li> </ul> </nav> </div> </div> </div> </div><!-- /end mobile-header --> </header> <main> <div id="content"> <div id='content-inner'> <div id='dlpage'> <h1>Information Retrieval</h1> <ul> <li><a href="#item0">New submissions</a></li> <li><a href="#item4">Cross-lists</a></li> <li><a href="#item5">Replacements</a></li> </ul> <p>See <a id="recent-cs.IR" aria-labelledby="recent-cs.IR" href="/list/cs.IR/recent">recent</a> articles</p> <h3>Showing new listings for Wednesday, 19 March 2025</h3> <div class='paging'>Total of 12 entries </div> <div class='morefewer'>Showing up to 2000 entries per page: <a href=/list/cs.IR/new?skip=0&show=1000 rel="nofollow"> fewer</a> | <span style="color: #454545">more</span> | <span style="color: #454545">all</span> </div> <dl id='articles'> <h3>New submissions (showing 3 of 3 entries)</h3> <dt> <a name='item1'>[1]</a> <a href ="/abs/2503.14110" title="Abstract" id="2503.14110"> arXiv:2503.14110 </a> [<a href="/pdf/2503.14110" title="Download PDF" id="pdf-2503.14110" aria-labelledby="pdf-2503.14110">pdf</a>, <a href="https://arxiv.org/html/2503.14110v1" title="View HTML" id="html-2503.14110" aria-labelledby="html-2503.14110" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14110" title="Other formats" id="oth-2503.14110" aria-labelledby="oth-2503.14110">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Comprehensive Survey on Cross-Domain Recommendation: Taxonomy, Progress, and Prospects </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+H">Hao Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cheng,+M">Mingyue Cheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Q">Qi Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jiang,+J">Junzhe Jiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+X">Xianquan Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+R">Rujiao Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lei,+C">Chenyi Lei</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+E">Enhong Chen</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Information Retrieval (cs.IR)</span> </div> <p class='mathjax'> Recommender systems (RS) have become crucial tools for information filtering in various real world scenarios. And cross domain recommendation (CDR) has been widely explored in recent years in order to provide better recommendation results in the target domain with the help of other domains. The CDR technology has developed rapidly, yet there is a lack of a comprehensive survey summarizing recent works. Therefore, in this paper, we will summarize the progress and prospects based on the main procedure of CDR, including Cross Domain Relevance, Cross Domain Interaction, Cross Domain Representation Enhancement and Model Optimization. To help researchers better understand and engage in this field, we also organize the applications and resources, and highlight several current important challenges and future directions of CDR. More details of the survey articles are available at <a href="https://github.com/USTCAGI/Awesome-Cross-Domain" rel="external noopener nofollow" class="link-external link-https">this https URL</a> Recommendation-Papers-and-Resources. </p> </div> </dd> <dt> <a name='item2'>[2]</a> <a href ="/abs/2503.14213" title="Abstract" id="2503.14213"> arXiv:2503.14213 </a> [<a href="/pdf/2503.14213" title="Download PDF" id="pdf-2503.14213" aria-labelledby="pdf-2503.14213">pdf</a>, <a href="https://arxiv.org/html/2503.14213v1" title="View HTML" id="html-2503.14213" aria-labelledby="html-2503.14213" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14213" title="Other formats" id="oth-2503.14213" aria-labelledby="oth-2503.14213">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Rolling Forward: Enhancing LightGCN with Causal Graph Convolution for Credit Bond Recommendation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Ghiye,+A">Ashraf Ghiye</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Barreau,+B">Baptiste Barreau</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Carlier,+L">Laurent Carlier</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Vazirgiannis,+M">Michalis Vazirgiannis</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 8 pages, published in the international conference for AI in Finance (ACM ICAIF'24) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Information Retrieval (cs.IR)</span>; Machine Learning (cs.LG); Computational Finance (q-fin.CP) </div> <p class='mathjax'> Graph Neural Networks have significantly advanced research in recommender systems over the past few years. These methods typically capture global interests using aggregated past interactions and rely on static embeddings of users and items over extended periods of time. While effective in some domains, these methods fall short in many real-world scenarios, especially in finance, where user interests and item popularity evolve rapidly over time. To address these challenges, we introduce a novel extension to Light Graph Convolutional Network (LightGCN) designed to learn temporal node embeddings that capture dynamic interests. Our approach employs causal convolution to maintain a forward-looking model architecture. By preserving the chronological order of user-item interactions and introducing a dynamic update mechanism for embeddings through a sliding window, the proposed model generates well-timed and contextually relevant recommendations. Extensive experiments on a real-world dataset from BNP Paribas demonstrate that our approach significantly enhances the performance of LightGCN while maintaining the simplicity and efficiency of its architecture. Our findings provide new insights into designing graph-based recommender systems in time-sensitive applications, particularly for financial product recommendations. </p> </div> </dd> <dt> <a name='item3'>[3]</a> <a href ="/abs/2503.14251" title="Abstract" id="2503.14251"> arXiv:2503.14251 </a> [<a href="/pdf/2503.14251" title="Download PDF" id="pdf-2503.14251" aria-labelledby="pdf-2503.14251">pdf</a>, <a href="/format/2503.14251" title="Other formats" id="oth-2503.14251" aria-labelledby="oth-2503.14251">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Towards a Barrier-free GeoQA Portal: Natural Language Interaction with Geospatial Data Using Multi-Agent LLMs and Semantic Search </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Feng,+Y">Yu Feng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+P">Puzhen Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xiao,+G">Guohui Xiao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ding,+L">Linfang Ding</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Meng,+L">Liqiu Meng</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Information Retrieval (cs.IR)</span> </div> <p class='mathjax'> A Barrier-Free GeoQA Portal: Enhancing Geospatial Data Accessibility with a Multi-Agent LLM Framework <br>Geoportals are vital for accessing and analyzing geospatial data, promoting open spatial data sharing and online geo-information management. Designed with GIS-like interaction and layered visualization, they often challenge non-expert users with complex functionalities and overlapping layers that obscure spatial relationships. We propose a GeoQA Portal using a multi-agent Large Language Model framework for seamless natural language interaction with geospatial data. Complex queries are broken into subtasks handled by specialized agents, retrieving relevant geographic data efficiently. Task plans are shown to users, boosting transparency. The portal supports default and custom data inputs for flexibility. Semantic search via word vector similarity aids data retrieval despite imperfect terms. Case studies, evaluations, and user tests confirm its effectiveness for non-experts, bridging GIS complexity and public access, and offering an intuitive solution for future geoportals. </p> </div> </dd> </dl> <dl id='articles'> <h3>Cross submissions (showing 1 of 1 entries)</h3> <dt> <a name='item4'>[4]</a> <a href ="/abs/2503.14258" title="Abstract" id="2503.14258"> arXiv:2503.14258 </a> (cross-list from cs.CL) [<a href="/pdf/2503.14258" title="Download PDF" id="pdf-2503.14258" aria-labelledby="pdf-2503.14258">pdf</a>, <a href="https://arxiv.org/html/2503.14258v1" title="View HTML" id="html-2503.14258" aria-labelledby="html-2503.14258" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14258" title="Other formats" id="oth-2503.14258" aria-labelledby="oth-2503.14258">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> JuDGE: Benchmarking Judgment Document Generation for Chinese Legal System </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Su,+W">Weihang Su</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yue,+B">Baoqing Yue</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ai,+Q">Qingyao Ai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hu,+Y">Yiran Hu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+J">Jiaqi Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+C">Changyue Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+K">Kaiyuan Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+Y">Yueyue Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Y">Yiqun Liu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI); Information Retrieval (cs.IR) </div> <p class='mathjax'> This paper introduces JuDGE (Judgment Document Generation Evaluation), a novel benchmark for evaluating the performance of judgment document generation in the Chinese legal system. We define the task as generating a complete legal judgment document from the given factual description of the case. To facilitate this benchmark, we construct a comprehensive dataset consisting of factual descriptions from real legal cases, paired with their corresponding full judgment documents, which serve as the ground truth for evaluating the quality of generated documents. This dataset is further augmented by two external legal corpora that provide additional legal knowledge for the task: one comprising statutes and regulations, and the other consisting of a large collection of past judgment documents. In collaboration with legal professionals, we establish a comprehensive automated evaluation framework to assess the quality of generated judgment documents across various dimensions. We evaluate various baseline approaches, including few-shot in-context learning, fine-tuning, and a multi-source retrieval-augmented generation (RAG) approach, using both general and legal-domain LLMs. The experimental results demonstrate that, while RAG approaches can effectively improve performance in this task, there is still substantial room for further improvement. All the codes and datasets are available at: <a href="https://github.com/oneal2000/JuDGE" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> </dl> <dl id='articles'> <h3>Replacement submissions (showing 8 of 8 entries)</h3> <dt> <a name='item5'>[5]</a> <a href ="/abs/2410.21967" title="Abstract" id="2410.21967"> arXiv:2410.21967 </a> (replaced) [<a href="/pdf/2410.21967" title="Download PDF" id="pdf-2410.21967" aria-labelledby="pdf-2410.21967">pdf</a>, <a href="https://arxiv.org/html/2410.21967v2" title="View HTML" id="html-2410.21967" aria-labelledby="html-2410.21967" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.21967" title="Other formats" id="oth-2410.21967" aria-labelledby="oth-2410.21967">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Dual Conditional Diffusion Models for Sequential Recommendation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+H">Hongtao Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+C">Chengkai Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+T">Tong Yu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chang,+X">Xiaojun Chang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hu,+W">Wen Hu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=McAuley,+J">Julian McAuley</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yao,+L">Lina Yao</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Information Retrieval (cs.IR)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Recent advancements in diffusion models have shown promising results in sequential recommendation (SR). Existing approaches predominantly rely on implicit conditional diffusion models, which compress user behaviors into a single representation during the forward diffusion process. While effective to some extent, this oversimplification often leads to the loss of sequential and contextual information, which is critical for understanding user behavior. Moreover, explicit information, such as user-item interactions or sequential patterns, remains underutilized, despite its potential to directly guide the recommendation process and improve precision. However, combining implicit and explicit information is non-trivial, as it requires dynamically integrating these complementary signals while avoiding noise and irrelevant patterns within user behaviors. To address these challenges, we propose Dual Conditional Diffusion Models for Sequential Recommendation (DCRec), which effectively integrates implicit and explicit information by embedding dual conditions into both the forward and reverse diffusion processes. This allows the model to retain valuable sequential and contextual information while leveraging explicit user-item interactions to guide the recommendation process. Specifically, we introduce the Dual Conditional Diffusion Transformer (DCDT), which employs a cross-attention mechanism to dynamically integrate explicit signals throughout the diffusion stages, ensuring contextual understanding and minimizing the influence of irrelevant patterns. This design enables precise and contextually relevant recommendations. Extensive experiments on public benchmark datasets demonstrate that DCRec significantly outperforms state-of-the-art methods in both accuracy and computational efficiency. </p> </div> </dd> <dt> <a name='item6'>[6]</a> <a href ="/abs/2301.02457" title="Abstract" id="2301.02457"> arXiv:2301.02457 </a> (replaced) [<a href="/pdf/2301.02457" title="Download PDF" id="pdf-2301.02457" aria-labelledby="pdf-2301.02457">pdf</a>, <a href="https://arxiv.org/html/2301.02457v2" title="View HTML" id="html-2301.02457" aria-labelledby="html-2301.02457" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2301.02457" title="Other formats" id="oth-2301.02457" aria-labelledby="oth-2301.02457">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Better Differentially Private Approximate Histograms and Heavy Hitters using the Misra-Gries Sketch </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Lebeda,+C+J">Christian Janos Lebeda</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=T%C4%9Btek,+J">Jakub T臎tek</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Added content for full version </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Data Structures and Algorithms (cs.DS)</span>; Cryptography and Security (cs.CR); Information Retrieval (cs.IR) </div> <p class='mathjax'> We consider the problem of computing differentially private approximate histograms and heavy hitters in a stream of elements. In the non-private setting, this is often done using the sketch of Misra and Gries [Science of Computer Programming, 1982]. Chan, Li, Shi, and Xu [PETS 2012] describe a differentially private version of the Misra-Gries sketch, but the amount of noise it adds can be large and scales linearly with the size of the sketch; the more accurate the sketch is, the more noise this approach has to add. We present a better mechanism for releasing a Misra-Gries sketch under $(\varepsilon,\delta)$-differential privacy. It adds noise with magnitude independent of the size of the sketch; in fact, the maximum error coming from the noise is the same as the best known in the private non-streaming setting, up to a constant factor. Our mechanism is simple and likely to be practical. We also give a simple post-processing step of the Misra-Gries sketch that does not increase the worst-case error guarantee. It is sufficient to add noise to this new sketch with less than twice the magnitude of the non-streaming setting. This improves on the previous result for $\varepsilon$-differential privacy where the noise scales linearly to the size of the sketch. Finally, we consider a general setting where users can contribute multiple distinct elements. We present a new sketch with maximum error matching the Misra-Gries sketch. For many parameters in this setting our sketch can be released with less noise under $(\varepsilon, \delta)$-differential privacy. </p> </div> </dd> <dt> <a name='item7'>[7]</a> <a href ="/abs/2312.11356" title="Abstract" id="2312.11356"> arXiv:2312.11356 </a> (replaced) [<a href="/pdf/2312.11356" title="Download PDF" id="pdf-2312.11356" aria-labelledby="pdf-2312.11356">pdf</a>, <a href="https://arxiv.org/html/2312.11356v2" title="View HTML" id="html-2312.11356" aria-labelledby="html-2312.11356" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2312.11356" title="Other formats" id="oth-2312.11356" aria-labelledby="oth-2312.11356">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> The Problem of Coherence in Natural Language Explanations of Recommendations </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Raczy%C5%84ski,+J">Jakub Raczy艅ski</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lango,+M">Mateusz Lango</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Stefanowski,+J">Jerzy Stefanowski</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> ECAI 2023 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI); Information Retrieval (cs.IR); Machine Learning (cs.LG) </div> <p class='mathjax'> Providing natural language explanations for recommendations is particularly useful from the perspective of a non-expert user. Although several methods for providing such explanations have recently been proposed, we argue that an important aspect of explanation quality has been overlooked in their experimental evaluation. Specifically, the coherence between generated text and predicted rating, which is a necessary condition for an explanation to be useful, is not properly captured by currently used evaluation measures. In this paper, we highlight the issue of explanation and prediction coherence by 1) presenting results from a manual verification of explanations generated by one of the state-of-the-art approaches 2) proposing a method of automatic coherence evaluation 3) introducing a new transformer-based method that aims to produce more coherent explanations than the state-of-the-art approaches 4) performing an experimental evaluation which demonstrates that this method significantly improves the explanation coherence without affecting the other aspects of recommendation performance. </p> </div> </dd> <dt> <a name='item8'>[8]</a> <a href ="/abs/2405.18770" title="Abstract" id="2405.18770"> arXiv:2405.18770 </a> (replaced) [<a href="/pdf/2405.18770" title="Download PDF" id="pdf-2405.18770" aria-labelledby="pdf-2405.18770">pdf</a>, <a href="https://arxiv.org/html/2405.18770v2" title="View HTML" id="html-2405.18770" aria-labelledby="html-2405.18770" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2405.18770" title="Other formats" id="oth-2405.18770" aria-labelledby="oth-2405.18770">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Multimodal Adversarial Defense for Vision-Language Models by Leveraging One-To-Many Relationships </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Waseda,+F">Futa Waseda</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tejero-de-Pablos,+A">Antonio Tejero-de-Pablos</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Echizen,+I">Isao Echizen</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Under review </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Artificial Intelligence (cs.AI); Information Retrieval (cs.IR) </div> <p class='mathjax'> Pre-trained vision-language (VL) models are highly vulnerable to adversarial attacks. However, existing defense methods primarily focus on image classification, overlooking two key aspects of VL tasks: multimodal attacks, where both image and text can be perturbed, and the one-to-many relationship of images and texts, where a single image can correspond to multiple textual descriptions and vice versa (1:N and N:1). This work is the first to explore defense strategies against multimodal attacks in VL tasks, whereas prior VL defense methods focus on vision robustness. We propose multimodal adversarial training (MAT), which incorporates adversarial perturbations in both image and text modalities during training, significantly outperforming existing unimodal defenses. Furthermore, we discover that MAT is limited by deterministic one-to-one (1:1) image-text pairs in VL training data. To address this, we conduct a comprehensive study on leveraging one-to-many relationships to enhance robustness, investigating diverse augmentation techniques. Our analysis shows that, for a more effective defense, augmented image-text pairs should be well-aligned, diverse, yet avoid distribution shift -- conditions overlooked by prior research. Our experiments show that MAT can effectively be applied to different VL models and tasks to improve adversarial robustness, outperforming previous efforts. Our code will be made public upon acceptance. </p> </div> </dd> <dt> <a name='item9'>[9]</a> <a href ="/abs/2406.09188" title="Abstract" id="2406.09188"> arXiv:2406.09188 </a> (replaced) [<a href="/pdf/2406.09188" title="Download PDF" id="pdf-2406.09188" aria-labelledby="pdf-2406.09188">pdf</a>, <a href="https://arxiv.org/html/2406.09188v2" title="View HTML" id="html-2406.09188" aria-labelledby="html-2406.09188" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2406.09188" title="Other formats" id="oth-2406.09188" aria-labelledby="oth-2406.09188">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> An Efficient Post-hoc Framework for Reducing Task Discrepancy of Text Encoders for Composed Image Retrieval </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Byun,+J">Jaeseok Byun</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jeong,+S">Seokhyeon Jeong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Kim,+W">Wonjae Kim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chun,+S">Sanghyuk Chun</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Moon,+T">Taesup Moon</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 22 pages </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Information Retrieval (cs.IR) </div> <p class='mathjax'> Composed Image Retrieval (CIR) aims to retrieve a target image based on a reference image and conditioning text, enabling controllable image searches. The mainstream Zero-Shot (ZS) CIR methods bypass the need for expensive training CIR triplets by projecting image embeddings into the text token embedding space, forming a composed query for retrieval. However, we highlight an inherent limitation in these projection-based CIR: a task discrepancy of text encoders between the original pre-training task of the encoders (text $\leftrightarrow$ image) and the target CIR task (image + text $\leftrightarrow$ image), which potentially negatively impacts CIR performance. To reduce such a discrepancy, a naive solution would be to train both image and text encoders with CIR triplets in a supervised manner. Instead, we introduce Reducing Task Discrepancy of Text Encoders (RTD), an efficient text-only post-hoc framework that complements projection-based CIR methods. We devise a novel target-anchored text contrastive learning designed to enhance the capability of the text encoder for CIR. We also propose two key enhancements: (1) a hard negative-based refined batch sampling strategy and (2) a refined concatenation scheme to further mitigate training-inference discrepancy. Integrating RTD into state-of-the-art projection-based methods achieves performance comparable to, or even surpassing, resource-intensive state-of-the-art synthetic CIR triplet-based approaches only with 23 minutes of additional training on 4 A100 GPUs (up to $100\times$ faster in training). Our code will be available upon acceptance. </p> </div> </dd> <dt> <a name='item10'>[10]</a> <a href ="/abs/2501.00513" title="Abstract" id="2501.00513"> arXiv:2501.00513 </a> (replaced) [<a href="/pdf/2501.00513" title="Download PDF" id="pdf-2501.00513" aria-labelledby="pdf-2501.00513">pdf</a>, <a href="/format/2501.00513" title="Other formats" id="oth-2501.00513" aria-labelledby="oth-2501.00513">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> CaReBench: A Fine-Grained Benchmark for Video Captioning and Retrieval </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+Y">Yifan Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+X">Xinhao Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+Y">Yichun Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Meng,+D">Desen Meng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+R">Rui Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wang,+L">Limin Wang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computer Vision and Pattern Recognition (cs.CV)</span>; Information Retrieval (cs.IR); Machine Learning (cs.LG) </div> <p class='mathjax'> Video understanding, including video captioning and retrieval, is still a great challenge for video-language models (VLMs). The existing video retrieval and caption benchmarks only include short descriptions, limits their ability of detailed video understanding evaluation. To address this problem, we present CaReBench, a testing benchmark for fine-grained video captioning and retrieval with 1,000 high-quality pairs of videos and human-annotated detailed captions. Uniquely, it provides manually separated spatial annotations and temporal annotations for each video. Based on this design, we introduce two evaluation metrics, ReBias and CapST, specifically tailored for video retrieval and video captioning tasks, respectively. These metrics enable a comprehensive investigation into the spatial and temporal biases inherent in VLMs. In addition, to handle both video retrieval and video captioning tasks in a unified framework, we develop a simple baseline based on a Multimodal Language Model (MLLM). By implementing a two-stage Supervised Fine-Tuning (SFT), we fully unlock the potential of MLLM, enabling it not only to generate detailed video descriptions but also to extract video features. Surprisingly, experimental results demonstrate that, compared to the CLIP-based models designed for retrieval and the popular MLLMs skilled in video captioning, our baseline shows competitive performance in both fine-grained video retrieval and video detailed captioning. </p> </div> </dd> <dt> <a name='item11'>[11]</a> <a href ="/abs/2501.09292" title="Abstract" id="2501.09292"> arXiv:2501.09292 </a> (replaced) [<a href="/pdf/2501.09292" title="Download PDF" id="pdf-2501.09292" aria-labelledby="pdf-2501.09292">pdf</a>, <a href="https://arxiv.org/html/2501.09292v3" title="View HTML" id="html-2501.09292" aria-labelledby="html-2501.09292" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2501.09292" title="Other formats" id="oth-2501.09292" aria-labelledby="oth-2501.09292">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> To Retrieve or Not to Retrieve? Uncertainty Detection for Dynamic Retrieval Augmented Generation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Dhole,+K+D">Kaustubh D. Dhole</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 1st workshop of "Quantify Uncertainty and Hallucination in Foundation Models: The Next Frontier in Reliable AI" at ICLR 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Computation and Language (cs.CL)</span>; Artificial Intelligence (cs.AI); Information Retrieval (cs.IR) </div> <p class='mathjax'> Retrieval-Augmented Generation equips large language models with the capability to retrieve external knowledge, thereby mitigating hallucinations by incorporating information beyond the model's intrinsic abilities. However, most prior works have focused on invoking retrieval deterministically, which makes it unsuitable for tasks such as long-form question answering. Instead, dynamically performing retrieval by invoking it only when the underlying LLM lacks the required knowledge can be more efficient. In this context, we delve deeper into the question, "To Retrieve or Not to Retrieve?" by exploring multiple uncertainty detection methods. We evaluate these methods for the task of long-form question answering, employing dynamic retrieval, and present our comparisons. Our findings suggest that uncertainty detection metrics, such as Degree Matrix Jaccard and Eccentricity, can reduce the number of retrieval calls by almost half, with only a slight reduction in question-answering accuracy. </p> </div> </dd> <dt> <a name='item12'>[12]</a> <a href ="/abs/2503.05592" title="Abstract" id="2503.05592"> arXiv:2503.05592 </a> (replaced) [<a href="/pdf/2503.05592" title="Download PDF" id="pdf-2503.05592" aria-labelledby="pdf-2503.05592">pdf</a>, <a href="https://arxiv.org/html/2503.05592v2" title="View HTML" id="html-2503.05592" aria-labelledby="html-2503.05592" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.05592" title="Other formats" id="oth-2503.05592" aria-labelledby="oth-2503.05592">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> R1-Searcher: Incentivizing the Search Capability in LLMs via Reinforcement Learning </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Song,+H">Huatong Song</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jiang,+J">Jinhao Jiang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Min,+Y">Yingqian Min</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+J">Jie Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+Z">Zhipeng Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+W+X">Wayne Xin Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fang,+L">Lei Fang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wen,+J">Ji-Rong Wen</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Artificial Intelligence (cs.AI)</span>; Computation and Language (cs.CL); Information Retrieval (cs.IR) </div> <p class='mathjax'> Existing Large Reasoning Models (LRMs) have shown the potential of reinforcement learning (RL) to enhance the complex reasoning capabilities of Large Language Models~(LLMs). While they achieve remarkable performance on challenging tasks such as mathematics and coding, they often rely on their internal knowledge to solve problems, which can be inadequate for time-sensitive or knowledge-intensive questions, leading to inaccuracies and hallucinations. To address this, we propose \textbf{R1-Searcher}, a novel two-stage outcome-based RL approach designed to enhance the search capabilities of LLMs. This method allows LLMs to autonomously invoke external search systems to access additional knowledge during the reasoning process. Our framework relies exclusively on RL, without requiring process rewards or distillation for a cold start. % effectively generalizing to out-of-domain datasets and supporting both Base and Instruct models. Our experiments demonstrate that our method significantly outperforms previous strong RAG methods, even when compared to the closed-source GPT-4o-mini. </p> </div> </dd> </dl> <div class='paging'>Total of 12 entries </div> <div class='morefewer'>Showing up to 2000 entries per page: <a href=/list/cs.IR/new?skip=0&show=1000 rel="nofollow"> fewer</a> | <span style="color: #454545">more</span> | <span style="color: #454545">all</span> </div> </div> </div> </div> </main> <footer style="clear: both;"> <div class="columns is-desktop" role="navigation" aria-label="Secondary" style="margin: -0.75em -0.75em 0.75em -0.75em"> <!-- Macro-Column 1 --> <div class="column" style="padding: 0;"> <div class="columns"> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- End Macro-Column 1 --> <!-- Macro-Column 2 --> <div class="column" style="padding: 0;"> <div class="columns"> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> <!-- End Macro-Column 2 --> </div> </footer> </div> <script src="/static/base/1.0.1/js/member_acknowledgement.js"></script> </body> </html>