CINXE.COM
Databases
<!DOCTYPE html> <html lang="en"> <head> <title>Databases </title> <meta name="viewport" content="width=device-width, initial-scale=1"> <link rel="apple-touch-icon" sizes="180x180" href="/static/browse/0.3.4/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="/static/browse/0.3.4/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="/static/browse/0.3.4/images/icons/favicon-16x16.png"> <link rel="manifest" href="/static/browse/0.3.4/images/icons/site.webmanifest"> <link rel="mask-icon" href="/static/browse/0.3.4/images/icons/safari-pinned-tab.svg" color="#5bbad5"> <meta name="msapplication-TileColor" content="#da532c"> <meta name="theme-color" content="#ffffff"> <link rel="stylesheet" type="text/css" media="screen" href="/static/browse/0.3.4/css/arXiv.css?v=20241206" /> <link rel="stylesheet" type="text/css" media="print" href="/static/browse/0.3.4/css/arXiv-print.css?v=20200611" /> <link rel="stylesheet" type="text/css" media="screen" href="/static/browse/0.3.4/css/browse_search.css" /> <script language="javascript" src="/static/browse/0.3.4/js/accordion.js" /></script> <script src="/static/browse/0.3.4/js/mathjaxToggle.min.js" type="text/javascript"></script> <script type="text/javascript" language="javascript">mathjaxToggle();</script> </head> <body class="with-cu-identity"> <div class="flex-wrap-footer"> <header> <a href="#content" class="is-sr-only">Skip to main content</a> <!-- start desktop header --> <div class="columns is-vcentered is-hidden-mobile" id="cu-identity"> <div class="column" id="cu-logo"> <a href="https://www.cornell.edu/"><img src="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg" alt="Cornell University" /></a> </div><div class="column" id="support-ack"> <span id="support-ack-url">We gratefully acknowledge support from the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors.</span> <a href="https://info.arxiv.org/about/donate.html" class="btn-header-donate">Donate</a> </div> </div> <div id="header" class="is-hidden-mobile"> <a aria-hidden="true" tabindex="-1" href="/IgnoreMe"></a> <div class="header-breadcrumbs"> <a href="/"><img src="/static/browse/0.3.4/images/arxiv-logo-one-color-white.svg" alt="arxiv logo" style="height:40px;"/></a> <span>></span> <a href="/list/cs.DB/recent">cs.DB</a> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div><!-- /end desktop header --> <div class="mobile-header"> <div class="columns is-mobile"> <div class="column logo-arxiv"><a href="https://arxiv.org/"><img src="/static/browse/0.3.4/images/arxiv-logomark-small-white.svg" alt="arXiv logo" style="height:60px;" /></a></div> <div class="column logo-cornell"><a href="https://www.cornell.edu/"> <picture> <source media="(min-width: 501px)" srcset="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg 400w" sizes="400w" /> <source srcset="/static/browse/0.3.4/images/icons/cu/cornell_seal_simple_black.svg 2x" /> <img src="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg" alt="Cornell University Logo" /> </picture> </a></div> <div class="column nav" id="toggle-container" role="menubar"> <button class="toggle-control"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-white"><title>open search</title><path d="M505 442.7L405.3 343c-4.5-4.5-10.6-7-17-7H372c27.6-35.3 44-79.7 44-128C416 93.1 322.9 0 208 0S0 93.1 0 208s93.1 208 208 208c48.3 0 92.7-16.4 128-44v16.3c0 6.4 2.5 12.5 7 17l99.7 99.7c9.4 9.4 24.6 9.4 33.9 0l28.3-28.3c9.4-9.4 9.4-24.6.1-34zM208 336c-70.7 0-128-57.2-128-128 0-70.7 57.2-128 128-128 70.7 0 128 57.2 128 128 0 70.7-57.2 128-128 128z"/></svg></button> <div class="mobile-toggle-block toggle-target"> <form class="mobile-search-form" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <input class="input" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <input type="hidden" name="source" value="header"> <input type="hidden" name="searchtype" value="all"> <button class="button">GO</button> </div> </form> </div> <button class="toggle-control"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-white" role="menu"><title>open navigation menu</title><path d="M16 132h416c8.837 0 16-7.163 16-16V76c0-8.837-7.163-16-16-16H16C7.163 60 0 67.163 0 76v40c0 8.837 7.163 16 16 16zm0 160h416c8.837 0 16-7.163 16-16v-40c0-8.837-7.163-16-16-16H16c-8.837 0-16 7.163-16 16v40c0 8.837 7.163 16 16 16zm0 160h416c8.837 0 16-7.163 16-16v-40c0-8.837-7.163-16-16-16H16c-8.837 0-16 7.163-16 16v40c0 8.837 7.163 16 16 16z"/ ></svg></button> <div class="mobile-toggle-block toggle-target"> <nav class="mobile-menu" aria-labelledby="mobilemenulabel"> <h2 id="mobilemenulabel">quick links</h2> <ul> <li><a href="https://arxiv.org/login">Login</a></li> <li><a href="https://info.arxiv.org/help">Help Pages</a></li> <li><a href="https://info.arxiv.org/about">About</a></li> </ul> </nav> </div> </div> </div> </div><!-- /end mobile-header --> </header> <main> <div id="content"> <div id='content-inner'> <div id='dlpage'> <h1>Databases</h1> <ul> <li><a href="#item0">New submissions</a></li> <li><a href="#item7">Cross-lists</a></li> <li><a href="#item8">Replacements</a></li> </ul> <p>See <a id="recent-cs.DB" aria-labelledby="recent-cs.DB" href="/list/cs.DB/recent">recent</a> articles</p> <h3>Showing new listings for Wednesday, 19 March 2025</h3> <div class='paging'>Total of 10 entries </div> <div class='morefewer'>Showing up to 2000 entries per page: <a href=/list/cs.DB/new?skip=0&show=1000 rel="nofollow"> fewer</a> | <span style="color: #454545">more</span> | <span style="color: #454545">all</span> </div> <dl id='articles'> <h3>New submissions (showing 6 of 6 entries)</h3> <dt> <a name='item1'>[1]</a> <a href ="/abs/2503.13461" title="Abstract" id="2503.13461"> arXiv:2503.13461 </a> [<a href="/pdf/2503.13461" title="Download PDF" id="pdf-2503.13461" aria-labelledby="pdf-2503.13461">pdf</a>, <a href="/format/2503.13461" title="Other formats" id="oth-2503.13461" aria-labelledby="oth-2503.13461">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> CARDS: A collection of package, revision, and miscellaneous dependency graphs </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Tran-Girard,+E">Euxane Tran-Girard</a> (LIGM, CNRS), <a href="https://arxiv.org/search/cs?searchtype=author&query=Bulteau,+L">Laurent Bulteau</a> (LIGM, CNRS), <a href="https://arxiv.org/search/cs?searchtype=author&query=David,+P">Pierre-Yves David</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Databases (cs.DB)</span>; Digital Libraries (cs.DL); Social and Information Networks (cs.SI) </div> <p class='mathjax'> CARDS (Corpus of Acyclic Repositories and Dependency Systems) is a collection of directed graphs which express dependency relations, extracted from diverse real-world sources such as package managers, version control systems, and event graphs. Each graph contains anywhere from thousands to hundreds of millions of nodes and edges, which are normalized into a simple, unified format. Both cyclic and acyclic variants are included (as some graphs, such as citation networks, are not entirely acyclic). The dataset is suitable for studying the structure of different kinds of dependencies, enabling the characterization and distinction of various dependency graph types. It has been utilized for developing and testing efficient algorithms which leverage the specificities of source version control graphs. The collection is publicly available at <a href="http://doi.org/10.5281/zenodo.14245890" rel="external noopener nofollow" class="link-external link-http">this http URL</a>. </p> </div> </dd> <dt> <a name='item2'>[2]</a> <a href ="/abs/2503.13502" title="Abstract" id="2503.13502"> arXiv:2503.13502 </a> [<a href="/pdf/2503.13502" title="Download PDF" id="pdf-2503.13502" aria-labelledby="pdf-2503.13502">pdf</a>, <a href="https://arxiv.org/html/2503.13502v1" title="View HTML" id="html-2503.13502" aria-labelledby="html-2503.13502" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13502" title="Other formats" id="oth-2503.13502" aria-labelledby="oth-2503.13502">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Foundation Models for Spatio-Temporal Data Science: A Tutorial and Survey </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Liang,+Y">Yuxuan Liang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wen,+H">Haomin Wen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xia,+Y">Yutong Xia</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jin,+M">Ming Jin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yang,+B">Bin Yang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Salim,+F">Flora Salim</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wen,+Q">Qingsong Wen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pan,+S">Shirui Pan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cong,+G">Gao Cong</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Databases (cs.DB)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Spatio-Temporal (ST) data science, which includes sensing, managing, and mining large-scale data across space and time, is fundamental to understanding complex systems in domains such as urban computing, climate science, and intelligent transportation. Traditional deep learning approaches have significantly advanced this field, particularly in the stage of ST data mining. However, these models remain task-specific and often require extensive labeled data. Inspired by the success of Foundation Models (FM), especially large language models, researchers have begun exploring the concept of Spatio-Temporal Foundation Models (STFMs) to enhance adaptability and generalization across diverse ST tasks. Unlike prior architectures, STFMs empower the entire workflow of ST data science, ranging from data sensing, management, to mining, thereby offering a more holistic and scalable approach. Despite rapid progress, a systematic study of STFMs for ST data science remains lacking. This survey aims to provide a comprehensive review of STFMs, categorizing existing methodologies and identifying key research directions to advance ST general intelligence. </p> </div> </dd> <dt> <a name='item3'>[3]</a> <a href ="/abs/2503.13521" title="Abstract" id="2503.13521"> arXiv:2503.13521 </a> [<a href="/pdf/2503.13521" title="Download PDF" id="pdf-2503.13521" aria-labelledby="pdf-2503.13521">pdf</a>, <a href="https://arxiv.org/html/2503.13521v1" title="View HTML" id="html-2503.13521" aria-labelledby="html-2503.13521" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13521" title="Other formats" id="oth-2503.13521" aria-labelledby="oth-2503.13521">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> States of Disarray: Cleaning Data for Gerrymandering Analysis </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Agarwal,+A">Ananya Agarwal</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Alusi,+F">Fnu Alusi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hsu,+A">Arbie Hsu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Syraj,+A">Arif Syraj</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Veomett,+E">Ellen Veomett</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 12 pages, 3 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Databases (cs.DB)</span>; Computers and Society (cs.CY); Physics and Society (physics.soc-ph) </div> <p class='mathjax'> The mathematics of redistricting is an area of study that has exploded in recent years. In particular, many different research groups and expert witnesses in court cases have used outlier analysis to argue that a proposed map is a gerrymander. This outlier analysis relies on having an ensemble of potential redistricting maps against which the proposed map is compared. Arguably the most widely-accepted method of creating such an ensemble is to use a Markov Chain Monte Carlo (MCMC) process. This process requires that various pieces of data be gathered, cleaned, and coalesced into a single file that can be used as the seed of the MCMC process. <br>In this article, we describe how we have begun this cleaning process for each state, and made the resulting data available for the public at <a href="https://github.com/eveomett-states" rel="external noopener nofollow" class="link-external link-https">this https URL</a> . At the time of submission, we have data for 22 states available for researchers, students, and the general public to easily access and analyze. We will continue the data cleaning process for each state, and we hope that the availability of these datasets will both further research in this area, and increase the public's interest in and understanding of modern techniques to detect gerrymandering. </p> </div> </dd> <dt> <a name='item4'>[4]</a> <a href ="/abs/2503.13822" title="Abstract" id="2503.13822"> arXiv:2503.13822 </a> [<a href="/pdf/2503.13822" title="Download PDF" id="pdf-2503.13822" aria-labelledby="pdf-2503.13822">pdf</a>, <a href="https://arxiv.org/html/2503.13822v1" title="View HTML" id="html-2503.13822" aria-labelledby="html-2503.13822" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13822" title="Other formats" id="oth-2503.13822" aria-labelledby="oth-2503.13822">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> NeurBench: Benchmarking Learned Database Components with Data and Workload Drift Modeling </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+Z">Zhanhao Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+G">Gang Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gao,+H">Haotian Gao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rigger,+M">Manuel Rigger</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ooi,+B+C">Beng Chin Ooi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xing,+N">Naili Xing</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zeng,+L">Lingze Zeng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+M">Meihui Zhang</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Databases (cs.DB)</span> </div> <p class='mathjax'> Learned database components, which deeply integrate machine learning into their design, have been extensively studied in recent years. Given the dynamism of databases, where data and workloads continuously drift, it is crucial for learned database components to remain effective and efficient in the face of data and workload drift. Adaptability, therefore, is a key factor in assessing their practical applicability. However, existing benchmarks for learned database components either overlook or oversimplify the treatment of data and workload drift, failing to evaluate learned database components across a broad range of drift scenarios. This paper presents NeurBench, a new benchmark suite that applies measurable and controllable data and workload drift to enable systematic performance evaluations of learned database components. We quantify diverse types of drift by introducing a key concept called the drift factor. Building on this formulation, we propose a drift-aware data and workload generation framework that effectively simulates real-world drift while preserving inherent correlations. We employ NeurBench to evaluate state-of-the-art learned query optimizers, learned indexes, and learned concurrency control within a consistent experimental process, providing insights into their performance under diverse data and workload drift scenarios. </p> </div> </dd> <dt> <a name='item5'>[5]</a> <a href ="/abs/2503.14195" title="Abstract" id="2503.14195"> arXiv:2503.14195 </a> [<a href="/pdf/2503.14195" title="Download PDF" id="pdf-2503.14195" aria-labelledby="pdf-2503.14195">pdf</a>, <a href="https://arxiv.org/html/2503.14195v1" title="View HTML" id="html-2503.14195" aria-labelledby="html-2503.14195" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14195" title="Other formats" id="oth-2503.14195" aria-labelledby="oth-2503.14195">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Mapping Urban Villages in China: Progress and Challenges </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Cao,+R">Rui Cao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Tu,+W">Wei Tu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+D">Dongsheng Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+W">Wenyu Zhang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Updated review at <a href="https://github.com/rui-research/urban-village-review" rel="external noopener nofollow" class="link-external link-https">this https URL</a> </div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> Cao, R., Tu, W., Chen, D., & Zhang, W. (2025). Mapping urban villages in China: Progress and challenges. Computers, Environment and Urban Systems, 119, 102282 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Databases (cs.DB)</span>; Computer Vision and Pattern Recognition (cs.CV) </div> <p class='mathjax'> The shift toward high-quality urbanization has brought increased attention to the issue of "urban villages", which has become a prominent social problem in China. However, there is a lack of available geospatial data on urban villages, making it crucial to prioritize urban village mapping. In order to assess the current progress in urban village mapping and identify challenges and future directions, we have conducted a comprehensive review, which to the best of our knowledge is the first of its kind in this field. Our review begins by providing a clear context for urban villages and elaborating the method for literature review, then summarizes the study areas, data sources, and approaches used for urban village mapping in China. We also address the challenges and future directions for further research. Through thorough investigation, we find that current studies only cover very limited study areas and periods and lack sufficient investigation into the scalability, transferability, and interpretability of identification approaches due to the challenges in concept fuzziness and variances, spatial heterogeneity and variances of urban villages, and data availability. Future research can complement and further the current research in the following potential directions in order to achieve large-area mapping across the whole nation... </p> </div> </dd> <dt> <a name='item6'>[6]</a> <a href ="/abs/2503.14469" title="Abstract" id="2503.14469"> arXiv:2503.14469 </a> [<a href="/pdf/2503.14469" title="Download PDF" id="pdf-2503.14469" aria-labelledby="pdf-2503.14469">pdf</a>, <a href="/format/2503.14469" title="Other formats" id="oth-2503.14469" aria-labelledby="oth-2503.14469">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Attribution Score Alignment in Explainable Data Management </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Azua,+F">Felipe Azua</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Bertossi,+L">Leopoldo Bertossi</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Databases (cs.DB)</span>; Artificial Intelligence (cs.AI) </div> <p class='mathjax'> Different attribution-scores have been proposed to quantify the relevance of database tuples for a query answer from a database. Among them, we find Causal Responsibility, the Shapley Value, the Banzhaf Power-Index, and the Causal Effect. They have been analyzed in isolation, mainly in terms of computational properties. In this work, we start an investigation into the alignment of these scores on the basis of the queries at hand; that is, on whether they induce compatible rankings of tuples. We are able to identify vast classes of queries for which some pairs of scores are always aligned, and others for which they are not. It turns out that the presence of exogenous tuples makes a crucial difference in this regard. </p> </div> </dd> </dl> <dl id='articles'> <h3>Cross submissions (showing 1 of 1 entries)</h3> <dt> <a name='item7'>[7]</a> <a href ="/abs/2503.13708" title="Abstract" id="2503.13708"> arXiv:2503.13708 </a> (cross-list from cs.AI) [<a href="/pdf/2503.13708" title="Download PDF" id="pdf-2503.13708" aria-labelledby="pdf-2503.13708">pdf</a>, <a href="https://arxiv.org/html/2503.13708v1" title="View HTML" id="html-2503.13708" aria-labelledby="html-2503.13708" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13708" title="Other formats" id="oth-2503.13708" aria-labelledby="oth-2503.13708">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Circular Construction Product Ontology for End-of-Life Decision-Making </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Adu-Duodu,+K">Kwabena Adu-Duodu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wilson,+S">Stanly Wilson</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+Y">Yinhao Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Oladimeji,+A">Aanuoluwapo Oladimeji</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Huraysi,+T">Talea Huraysi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Barati,+M">Masoud Barati</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Perera,+C">Charith Perera</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Solaiman,+E">Ellis Solaiman</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Rana,+O">Omer Rana</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ranjan,+R">Rajiv Ranjan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Shah,+T">Tejal Shah</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Artificial Intelligence (cs.AI)</span>; Databases (cs.DB) </div> <p class='mathjax'> Efficient management of end-of-life (EoL) products is critical for advancing circularity in supply chains, particularly within the construction industry where EoL strategies are hindered by heterogenous lifecycle data and data silos. Current tools like Environmental Product Declarations (EPDs) and Digital Product Passports (DPPs) are limited by their dependency on seamless data integration and interoperability which remain significant challenges. To address these, we present the Circular Construction Product Ontology (CCPO), an applied framework designed to overcome semantic and data heterogeneity challenges in EoL decision-making for construction products. CCPO standardises vocabulary and facilitates data integration across supply chain stakeholders enabling lifecycle assessments (LCA) and robust decision-making. By aggregating disparate data into a unified product provenance, CCPO enables automated EoL recommendations through customisable SWRL rules aligned with European standards and stakeholder-specific circularity SLAs, demonstrating its scalability and integration capabilities. The adopted circular product scenario depicts CCPO's application while competency question evaluations show its superior performance in generating accurate EoL suggestions highlighting its potential to greatly improve decision-making in circular supply chains and its applicability in real-world construction environments. </p> </div> </dd> </dl> <dl id='articles'> <h3>Replacement submissions (showing 3 of 3 entries)</h3> <dt> <a name='item8'>[8]</a> <a href ="/abs/2502.12918" title="Abstract" id="2502.12918"> arXiv:2502.12918 </a> (replaced) [<a href="/pdf/2502.12918" title="Download PDF" id="pdf-2502.12918" aria-labelledby="pdf-2502.12918">pdf</a>, <a href="https://arxiv.org/html/2502.12918v2" title="View HTML" id="html-2502.12918" aria-labelledby="html-2502.12918" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2502.12918" title="Other formats" id="oth-2502.12918" aria-labelledby="oth-2502.12918">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Query Rewriting via LLMs </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Dharwada,+S">Sriram Dharwada</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Devrani,+H">Himanshu Devrani</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Haritsa,+J">Jayant Haritsa</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Doraiswamy,+H">Harish Doraiswamy</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Databases (cs.DB)</span> </div> <p class='mathjax'> When complex SQL queries suffer slow executions despite query optimization, DBAs typically invoke automated query rewriting tools to recommend ``lean'' equivalents that are conducive to faster execution. The rewritings are usually achieved via transformation rules, but these rules are limited in scope and difficult to update in a production system. Recently, LLM-based techniques have also been suggested, but they are prone to semantic and syntactic errors. <br>We investigate here how the remarkable cognitive capabilities of LLMs can be leveraged for performant query rewriting while incorporating safeguards and optimizations to ensure correctness and efficiency. Our study shows that these goals can be progressively achieved through incorporation of (a) an ensemble suite of basic prompts, (b) database-sensitive prompts via redundancy removal and selectivity-based rewriting rules, and (c) LLM token probability-guided rewrite paths. Further, a suite of logic-based and statistical tools can be used to check for semantic violations in the rewrites prior to DBA consideration. <br>We have implemented the above LLM-infused techniques in the LITHE system, and evaluated complex analytic queries from standard benchmarks on contemporary database platforms. The results show significant performance improvements for slow queries, with regard to both abstract costing and actual execution, over both SOTA techniques and the native query optimizer. For instance, with TPC-DS on PostgreSQL, the geometric mean of the runtime speedups for slow queries was as high as 18.4 over the native optimizer, whereas SOTA delivered 6 in comparison. <br>Overall, LITHE is a promising step toward viable LLM-based advisory tools for ameliorating enterprise query performance. </p> </div> </dd> <dt> <a name='item9'>[9]</a> <a href ="/abs/2503.00402" title="Abstract" id="2503.00402"> arXiv:2503.00402 </a> (replaced) [<a href="/pdf/2503.00402" title="Download PDF" id="pdf-2503.00402" aria-labelledby="pdf-2503.00402">pdf</a>, <a href="https://arxiv.org/html/2503.00402v2" title="View HTML" id="html-2503.00402" aria-labelledby="html-2503.00402" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.00402" title="Other formats" id="oth-2503.00402" aria-labelledby="oth-2503.00402">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> A Topology-Aware Localized Update Strategy for Graph-Based ANN Index </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+S">Song Yu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Lin,+S">Shengyuan Lin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Gong,+S">Shufeng Gong</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xie,+Y">Yongqing Xie</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+R">Ruicheng Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhou,+Y">Yijie Zhou</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Sun,+J">Ji Sun</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Y">Yanfeng Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+G">Guoliang Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yu,+G">Ge Yu</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Databases (cs.DB)</span> </div> <p class='mathjax'> The graph-based index has been widely adopted to meet the demand for approximate nearest neighbor search (ANNS) for high-dimensional vectors. However, in dynamic scenarios involving frequent vector insertions and deletions, existing systems improve update throughput by adopting a batch update method. However, a large batch size leads to significant degradation in search accuracy. <br>This work aims to improve the performance of graph-based ANNS systems in small-batch update scenarios, while maintaining high search efficiency and accuracy. We identify two key issues in existing batch update systems for small-batch updates. First, the system needs to scan the entire index file to identify and update the affected vertices, resulting in excessive unnecessary I/O. Second, updating the affected vertices introduces many new neighbors, frequently triggering neighbor pruning. To address these issues, we propose a topology-aware localized update strategy for graph-based ANN index. We introduce a lightweight index topology to identify affected vertices efficiently and employ a localized update strategy that modifies only the affected vertices in the index file. To mitigate frequent heavy neighbor pruning, we propose a similar neighbor replacement strategy, which connects the affected vertices to only a small number (typically one) of the most similar outgoing neighbors of the deleted vertex during repair. Based on extensive experiments on real-world datasets, our update strategy achieves 2.47X-6.45X higher update throughput than the state-of-the-art system FreshDiskANN while maintaining high search efficiency and accuracy. </p> </div> </dd> <dt> <a name='item10'>[10]</a> <a href ="/abs/2503.08087" title="Abstract" id="2503.08087"> arXiv:2503.08087 </a> (replaced) [<a href="/pdf/2503.08087" title="Download PDF" id="pdf-2503.08087" aria-labelledby="pdf-2503.08087">pdf</a>, <a href="https://arxiv.org/html/2503.08087v3" title="View HTML" id="html-2503.08087" aria-labelledby="html-2503.08087" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.08087" title="Other formats" id="oth-2503.08087" aria-labelledby="oth-2503.08087">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Resolvi: A Reference Architecture for Extensible, Scalable and Interoperable Entity Resolution </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Olar,+A">Andrei Olar</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Databases (cs.DB)</span> </div> <p class='mathjax'> Context: Entity resolution (ER) plays a pivotal role in data management by determining whether multiple records correspond to the same real-world entity. Because of its critical importance across domains such as healthcare, finance, and machine learning and its long research history designing and implementing ER systems remains challenging in practice due to the wide array of methodologies and tools available. This diversity results in a paradox of choice for practitioners, which is further compounded by the various ER variants (record linkage, entity alignment, merge/purge, a.s.o). <br>Objective: This paper introduces Resolvi, a reference architecture for facilitating the design of ER systems. The goal is to facilitate creating extensible, interoperable and scalable ER systems and to reduce architectural decision-making duration. <br>Methods: Software design techniques such as the 4+1 view model or visual communication tools such as UML are used to present the reference architecture in a structured way. Source code analysis and literature review are used to derive the main elements of the reference architecture. <br>Results: This paper identifies generic requirements and architectural qualities of ER systems. It provides design guidelines, patterns, and recommendations for creating extensible, scalable, and interoperable ER systems. Furthermore, it highlights implementation best practices and deployment strategies based on insights from existing systems. <br>Conclusion: The proposed reference architecture offers a foundational blueprint for researchers and practitioners in developing extensible, interoperable, and scalable ER systems. Resolvi provides clear abstractions and design recommendations which simplify architecture decision making, whether designing new ER systems or improving existing designs. </p> </div> </dd> </dl> <div class='paging'>Total of 10 entries </div> <div class='morefewer'>Showing up to 2000 entries per page: <a href=/list/cs.DB/new?skip=0&show=1000 rel="nofollow"> fewer</a> | <span style="color: #454545">more</span> | <span style="color: #454545">all</span> </div> </div> </div> </div> </main> <footer style="clear: both;"> <div class="columns is-desktop" role="navigation" aria-label="Secondary" style="margin: -0.75em -0.75em 0.75em -0.75em"> <!-- Macro-Column 1 --> <div class="column" style="padding: 0;"> <div class="columns"> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- End Macro-Column 1 --> <!-- Macro-Column 2 --> <div class="column" style="padding: 0;"> <div class="columns"> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> <!-- End Macro-Column 2 --> </div> </footer> </div> <script src="/static/base/1.0.1/js/member_acknowledgement.js"></script> </body> </html>