Databases

<!DOCTYPE html> <html lang="en"> <head> <title>Databases </title> <meta name="viewport" content="width=device-width, initial-scale=1"> <link rel="apple-touch-icon" sizes="180x180" href="/static/browse/0.3.4/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="/static/browse/0.3.4/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="/static/browse/0.3.4/images/icons/favicon-16x16.png"> <link rel="manifest" href="/static/browse/0.3.4/images/icons/site.webmanifest"> <link rel="mask-icon" href="/static/browse/0.3.4/images/icons/safari-pinned-tab.svg" color="#5bbad5"> <meta name="msapplication-TileColor" content="#da532c"> <meta name="theme-color" content="#ffffff"> <link rel="stylesheet" type="text/css" media="screen" href="/static/browse/0.3.4/css/arXiv.css?v=20241206" /> <link rel="stylesheet" type="text/css" media="print" href="/static/browse/0.3.4/css/arXiv-print.css?v=20200611" /> <link rel="stylesheet" type="text/css" media="screen" href="/static/browse/0.3.4/css/browse_search.css" /> <script language="javascript" src="/static/browse/0.3.4/js/accordion.js" /></script> <script src="/static/browse/0.3.4/js/mathjaxToggle.min.js" type="text/javascript"></script> <script type="text/javascript" language="javascript">mathjaxToggle();</script> </head> <body class="with-cu-identity"> <div class="flex-wrap-footer"> <header> <a href="#content" class="is-sr-only">Skip to main content</a>  <div class="columns is-vcentered is-hidden-mobile" id="cu-identity"> <div class="column" id="cu-logo"> <a href="https://www.cornell.edu/"><img src="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg" alt="Cornell University" /></a> </div><div class="column" id="support-ack"> <span id="support-ack-url">We gratefully acknowledge support from the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors.</span> <a href="https://info.arxiv.org/about/donate.html" class="btn-header-donate">Donate</a> </div> </div> <div id="header" class="is-hidden-mobile"> <a aria-hidden="true" tabindex="-1" href="/IgnoreMe"></a> <div class="header-breadcrumbs"> <a href="/"><img src="/static/browse/0.3.4/images/arxiv-logo-one-color-white.svg" alt="arxiv logo" style="height:40px;"/></a> <span>></span> <a href="/list/cs.DB/recent">cs.DB</a> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <div class="mobile-header"> <div class="columns is-mobile"> <div class="column logo-arxiv"><a href="https://arxiv.org/"><img src="/static/browse/0.3.4/images/arxiv-logomark-small-white.svg" alt="arXiv logo" style="height:60px;" /></a></div> <div class="column logo-cornell"><a href="https://www.cornell.edu/"> <picture> <source media="(min-width: 501px)" srcset="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg 400w" sizes="400w" /> <source srcset="/static/browse/0.3.4/images/icons/cu/cornell_seal_simple_black.svg 2x" /> <img src="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg" alt="Cornell University Logo" /> </picture> </a></div> <div class="column nav" id="toggle-container" role="menubar"> <button class="toggle-control"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-white"><title>open search</title><path d="M505 442.7L405.3 343c-4.5-4.5-10.6-7-17-7H372c27.6-35.3 44-79.7 44-128C416 93.1 322.9 0 208 0S0 93.1 0 208s93.1 208 208 208c48.3 0 92.7-16.4 128-44v16.3c0 6.4 2.5 12.5 7 17l99.7 99.7c9.4 9.4 24.6 9.4 33.9 0l28.3-28.3c9.4-9.4 9.4-24.6.1-34zM208 336c-70.7 0-128-57.2-128-128 0-70.7 57.2-128 128-128 70.7 0 128 57.2 128 128 0 70.7-57.2 128-128 128z"/></svg></button> <div class="mobile-toggle-block toggle-target"> <form class="mobile-search-form" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <input class="input" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <input type="hidden" name="source" value="header"> <input type="hidden" name="searchtype" value="all"> <button class="button">GO</button> </div> </form> </div> <button class="toggle-control"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-white" role="menu"><title>open navigation menu</title><path d="M16 132h416c8.837 0 16-7.163 16-16V76c0-8.837-7.163-16-16-16H16C7.163 60 0 67.163 0 76v40c0 8.837 7.163 16 16 16zm0 160h416c8.837 0 16-7.163 16-16v-40c0-8.837-7.163-16-16-16H16c-8.837 0-16 7.163-16 16v40c0 8.837 7.163 16 16 16zm0 160h416c8.837 0 16-7.163 16-16v-40c0-8.837-7.163-16-16-16H16c-8.837 0-16 7.163-16 16v40c0 8.837 7.163 16 16 16z"/ ></svg></button> <div class="mobile-toggle-block toggle-target"> <nav class="mobile-menu" aria-labelledby="mobilemenulabel"> <h2 id="mobilemenulabel">quick links</h2> <ul> <li><a href="https://arxiv.org/login">Login</a></li> <li><a href="https://info.arxiv.org/help">Help Pages</a></li> <li><a href="https://info.arxiv.org/about">About</a></li> </ul> </nav> </div> </div> </div> </div> </header> <main> <div id="content"> <div id='content-inner'> <div id='dlpage'> <h1>Databases</h1> <ul> <li><a href="#item0">New submissions</a></li> <li><a href="#item3">Cross-lists</a></li> <li><a href="#item4">Replacements</a></li> </ul> <p>See <a id="recent-cs.DB" aria-labelledby="recent-cs.DB" href="/list/cs.DB/recent">recent</a> articles</p> <h3>Showing new listings for Thursday, 20 March 2025</h3> <div class='paging'>Total of 7 entries </div> <div class='morefewer'>Showing up to 2000 entries per page: <a href=/list/cs.DB/new?skip=0&show=1000 rel="nofollow"> fewer</a> | <span style="color: #454545">more</span> | <span style="color: #454545">all</span> </div> <dl id='articles'> <h3>New submissions (showing 2 of 2 entries)</h3> <dt> <a name='item1'>[1]</a> <a href ="/abs/2503.14929" title="Abstract" id="2503.14929"> arXiv:2503.14929 </a> [<a href="/pdf/2503.14929" title="Download PDF" id="pdf-2503.14929" aria-labelledby="pdf-2503.14929">pdf</a>, <a href="https://arxiv.org/html/2503.14929v1" title="View HTML" id="html-2503.14929" aria-labelledby="html-2503.14929" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.14929" title="Other formats" id="oth-2503.14929" aria-labelledby="oth-2503.14929">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> ACE: A Cardinality Estimator for Set-Valued Queries </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Sheng,+Y">Yufan Sheng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cao,+X">Xin Cao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+K">Kaiqi Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Fang,+Y">Yixiang Fang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Qi,+J">Jianzhong Qi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+W">Wenjie Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jensen,+C+S">Christian S. Jensen</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> This paper has been accepted by PVLDB Vol 18 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Databases (cs.DB)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Cardinality estimation is a fundamental functionality in database systems. Most existing cardinality estimators focus on handling predicates over numeric or categorical data. They have largely omitted an important data type, set-valued data, which frequently occur in contemporary applications such as information retrieval and recommender systems. The few existing estimators for such data either favor high-frequency elements or rely on a partial independence assumption, which limits their practical applicability. We propose ACE, an Attention-based Cardinality Estimator for estimating the cardinality of queries over set-valued data. We first design a distillation-based data encoder to condense the dataset into a compact matrix. We then design an attention-based query analyzer to capture correlations among query elements. To handle variable-sized queries, a pooling module is introduced, followed by a regression model (MLP) to generate final cardinality estimates. We evaluate ACE on three datasets with varying query element distributions, demonstrating that ACE outperforms the state-of-the-art competitors in terms of both accuracy and efficiency. </p> </div> </dd> <dt> <a name='item2'>[2]</a> <a href ="/abs/2503.14937" title="Abstract" id="2503.14937"> arXiv:2503.14937 </a> [<a href="/pdf/2503.14937" title="Download PDF" id="pdf-2503.14937" aria-labelledby="pdf-2503.14937">pdf</a>, <a href="/format/2503.14937" title="Other formats" id="oth-2503.14937" aria-labelledby="oth-2503.14937">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Proceedings of the 3rd Italian Conference on Big Data and Data Science (ITADATA2024) </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Bena,+N">Nicola Bena</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Diamantini,+C">Claudia Diamantini</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Natilli,+M">Michela Natilli</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Romano,+L">Luigi Romano</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Stilo,+G">Giovanni Stilo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pansanella,+V">Valentina Pansanella</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ardagna,+C+A">Claudio A. Ardagna</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Monreale,+A">Anna Monreale</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Trasarti,+R">Roberto Trasarti</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Databases (cs.DB)</span>; Machine Learning (cs.LG) </div> <p class='mathjax'> Proceedings of the 3rd Italian Conference on Big Data and Data Science (ITADATA2024), held in Pisa, Italy, September 17-19, 2024. <br>The Italian Conference on Big Data and Data Science (ITADATA2024) is the annual event supported by the CINI Big Data National Laboratory and ISTI CNR that aims to put together Italian researchers and professionals from academia, industry, government, and public administration working in the field of big data and data science, as well as related fields (e.g., security and privacy, HPC, Cloud). <br>ITADATA2024 covered research on all theoretical and practical aspects of Big Data and data science including data governance, data processing, data analysis, data reporting, data protection, as well as experimental studies and lessons learned. In particular, ITADATA2024 focused on <br>- Data spaces <br>- Data processing life cycle <br>- Machine learning and Large Language Models <br>- Applications of big data and data science in healthcare, finance, industry 5.0, and beyond <br>- Data science for social network analysis </p> </div> </dd> </dl> <dl id='articles'> <h3>Cross submissions (showing 1 of 1 entries)</h3> <dt> <a name='item3'>[3]</a> <a href ="/abs/2503.15250" title="Abstract" id="2503.15250"> arXiv:2503.15250 </a> (cross-list from cs.LG) [<a href="/pdf/2503.15250" title="Download PDF" id="pdf-2503.15250" aria-labelledby="pdf-2503.15250">pdf</a>, <a href="https://arxiv.org/html/2503.15250v1" title="View HTML" id="html-2503.15250" aria-labelledby="html-2503.15250" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.15250" title="Other formats" id="oth-2503.15250" aria-labelledby="oth-2503.15250">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> ImputeGAP: A Comprehensive Library for Time Series Imputation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Nater,+Q">Quentin Nater</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Khayati,+M">Mourad Khayati</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Pasquier,+J">Jacques Pasquier</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Databases (cs.DB) </div> <p class='mathjax'> With the prevalence of sensor failures, imputation--the process of estimating missing values--has emerged as the cornerstone of time series data preparation. While numerous imputation algorithms have been developed to address these data gaps, existing libraries provide limited support. Furthermore, they often lack the ability to simulate realistic patterns of time series missing data and fail to account for the impact of imputation on subsequent downstream analysis. <br>This paper introduces ImputeGAP, a comprehensive library for time series imputation that supports a diverse range of imputation methods and modular missing data simulation catering to datasets with varying characteristics. The library includes extensive customization options, such as automated hyperparameter tuning, benchmarking, explainability, downstream evaluation, and compatibility with popular time series frameworks. </p> </div> </dd> </dl> <dl id='articles'> <h3>Replacement submissions (showing 4 of 4 entries)</h3> <dt> <a name='item4'>[4]</a> <a href ="/abs/2210.11996" title="Abstract" id="2210.11996"> arXiv:2210.11996 </a> (replaced) [<a href="/pdf/2210.11996" title="Download PDF" id="pdf-2210.11996" aria-labelledby="pdf-2210.11996">pdf</a>, <a href="https://arxiv.org/html/2210.11996v4" title="View HTML" id="html-2210.11996" aria-labelledby="html-2210.11996" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2210.11996" title="Other formats" id="oth-2210.11996" aria-labelledby="oth-2210.11996">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Unbalanced Triangle Detection and Enumeration Hardness for Unions of Conjunctive Queries </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Bringmann,+K">Karl Bringmann</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Carmeli,+N">Nofar Carmeli</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Databases (cs.DB)</span>; Data Structures and Algorithms (cs.DS) </div> <p class='mathjax'> We study the enumeration of answers to Unions of Conjunctive Queries (UCQs) with optimal time guarantees. More precisely, we wish to identify the queries that can be solved with linear preprocessing time and constant delay. Despite the basic nature of this problem, it was shown only recently that UCQs can be solved within these time bounds if they admit free-connex union extensions, even if all individual CQs in the union are intractable with respect to the same complexity measure. Our goal is to understand whether there exist additional tractable UCQs, not covered by the currently known algorithms. As a first step, we show that some previously unclassified UCQs are hard using the classic 3SUM hypothesis, via a known reduction from 3SUM to triangle listing in graphs. As a second step, we identify a question about a variant of this graph task that is unavoidable if we want to classify all self-join-free UCQs: is it possible to decide the existence of a triangle in a vertex-unbalanced tripartite graph in linear time? We prove that this task is equivalent in hardness to some family of UCQs. Finally, we show a dichotomy for unions of two self-join-free CQs if we assume the answer to this question is negative. In conclusion, this paper pinpoints a computational barrier in the form of a single decision problem that is key to advancing our understanding of the enumeration complexity of many UCQs. Without a breakthrough for unbalanced triangle detection, we have no hope of finding an efficient algorithm for additional unions of two self-join-free CQs. On the other hand, a sufficiently efficient unbalanced triangle detection algorithm can be turned into an efficient algorithm for a family of UCQs currently not known to be tractable. </p> </div> </dd> <dt> <a name='item5'>[5]</a> <a href ="/abs/2503.10036" title="Abstract" id="2503.10036"> arXiv:2503.10036 </a> (replaced) [<a href="/pdf/2503.10036" title="Download PDF" id="pdf-2503.10036" aria-labelledby="pdf-2503.10036">pdf</a>, <a href="https://arxiv.org/html/2503.10036v2" title="View HTML" id="html-2503.10036" aria-labelledby="html-2503.10036" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.10036" title="Other formats" id="oth-2503.10036" aria-labelledby="oth-2503.10036">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> CCaaLF: Concurrency Control as a Learnable Function </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Pan,+H">Hexiang Pan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Cai,+S">Shaofeng Cai</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+G">Gang Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Dinh,+T+T+A">Tien Tuan Anh Dinh</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Wu,+Y">Yuncheng Wu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chee,+Y+M">Yeow Meng Chee</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ooi,+B+C">Beng Chin Ooi</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Databases (cs.DB)</span> </div> <p class='mathjax'> Concurrency control (CC) algorithms are important in modern transactional databases, as they enable high performance by executing transactions concurrently while ensuring correctness. However, state-of-the-art CC algorithms struggle to perform well across diverse workloads, and most do not consider workload drifts. <br>In this paper, we propose CCaaLF (Concurrency Control as a Learnable Function), a novel learned concurrency control algorithm designed to achieve high performance across varying workloads. The algorithm is quick to optimize, making it robust against dynamic workloads. CCaaLF learns an agent function that captures a large number of design choices from existing CC algorithms. The function is implemented as an efficient in-database lookup table that maps database states to concurrency control actions. The learning process is based on a combination of Bayesian optimization and a novel graph reduction algorithm, which converges quickly to a function that achieves high transaction throughput. We compare CCaaLF against five state-of-the-art CC algorithms and show that our algorithm consistently outperforms them in terms of transaction throughput and optimization time. </p> </div> </dd> <dt> <a name='item6'>[6]</a> <a href ="/abs/2404.11581" title="Abstract" id="2404.11581"> arXiv:2404.11581 </a> (replaced) [<a href="/pdf/2404.11581" title="Download PDF" id="pdf-2404.11581" aria-labelledby="pdf-2404.11581">pdf</a>, <a href="https://arxiv.org/html/2404.11581v3" title="View HTML" id="html-2404.11581" aria-labelledby="html-2404.11581" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2404.11581" title="Other formats" id="oth-2404.11581" aria-labelledby="oth-2404.11581">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> E2ETune: End-to-End Knob Tuning via Fine-tuned Generative Language Model </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Huang,+X">Xinmei Huang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+H">Haoyang Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+J">Jing Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhao,+X">Xinxin Zhao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Yao,+Z">Zhiming Yao</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+Y">Yiyan Li</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+T">Tieying Zhang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+J">Jianjun Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Chen,+H">Hong Chen</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Li,+C">Cuiping Li</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted by VLDB 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Artificial Intelligence (cs.AI)</span>; Databases (cs.DB) </div> <p class='mathjax'> Database knob tuning is a significant challenge for database administrators, as it involves tuning a large number of configuration knobs with continuous or discrete values to achieve optimal database performance. Traditional methods, such as manual tuning or learning-based approaches, typically require numerous workload replays and are both time-consuming and resource-intensive. To address this challenge, we introduce E2ETune, an end-to-end knob tuner powered by a fine-tuned generative language model. The key idea is to leverage the exceptional sequence-to-sequence modeling capabilities of generative language models to capture the complex mapping between workloads (inputs) and their corresponding promising configurations (outputs). To achieve this goal, we propose a novel data generation framework to efficiently produce a large amount of training data, where each data sample consists of a workload and its promising configuration. Then, these data are used to fine-tune a generative language model, yielding an end-to-end knob tuner. This tuner offers out-of-the-box configuration recommendations for new workloads. We conduct extensive experiments to evaluate E2ETune's efficiency and effectiveness using 10 representative and 3 real-world benchmarks. Compared to state-of-the-art methods, E2ETune can identify competitive configurations in significantly less time. </p> </div> </dd> <dt> <a name='item7'>[7]</a> <a href ="/abs/2410.11843" title="Abstract" id="2410.11843"> arXiv:2410.11843 </a> (replaced) [<a href="/pdf/2410.11843" title="Download PDF" id="pdf-2410.11843" aria-labelledby="pdf-2410.11843">pdf</a>, <a href="https://arxiv.org/html/2410.11843v5" title="View HTML" id="html-2410.11843" aria-labelledby="html-2410.11843" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2410.11843" title="Other formats" id="oth-2410.11843" aria-labelledby="oth-2410.11843">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> From Commands to Prompts: LLM-based Semantic File System for AIOS </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&query=Shi,+Z">Zeru Shi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Mei,+K">Kai Mei</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Jin,+M">Mingyu Jin</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Su,+Y">Yongye Su</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zuo,+C">Chaoji Zuo</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Hua,+W">Wenyue Hua</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Xu,+W">Wujiang Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Ren,+Y">Yujie Ren</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Liu,+Z">Zirui Liu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Du,+M">Mengnan Du</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Deng,+D">Dong Deng</a>, <a href="https://arxiv.org/search/cs?searchtype=author&query=Zhang,+Y">Yongfeng Zhang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted by International Conference on Learning Representations 2025(ICLR2025) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Human-Computer Interaction (cs.HC)</span>; Artificial Intelligence (cs.AI); Databases (cs.DB); Machine Learning (cs.LG) </div> <p class='mathjax'> Large language models (LLMs) have demonstrated significant potential in the development of intelligent applications and systems such as LLM-based agents and agent operating systems (AIOS). However, when these applications and systems interact with the underlying file system, the file system still remains the traditional paradigm: reliant on manual navigation through precise commands. This paradigm poses a bottleneck to the usability of these systems as users are required to navigate complex folder hierarchies and remember cryptic file names. To address this limitation, we propose an LLM-based semantic file system ( LSFS ) for prompt-driven file management. Unlike conventional approaches, LSFS incorporates LLMs to enable users or agents to interact with files through natural language prompts, facilitating semantic file management. At the macro-level, we develop a comprehensive API set to achieve semantic file management functionalities, such as semantic file retrieval, file update monitoring and summarization, and semantic file rollback). At the micro-level, we store files by constructing semantic indexes for them, design and implement syscalls of different semantic operations (e.g., CRUD, group by, join) powered by vector database. Our experiments show that LSFS offers significant improvements over traditional file systems in terms of user convenience, the diversity of supported functions, and the accuracy and efficiency of file operations. Additionally, with the integration of LLM, our system enables more intelligent file management tasks, such as content summarization and version comparison, further enhancing its capabilities. </p> </div> </dd> </dl> <div class='paging'>Total of 7 entries </div> <div class='morefewer'>Showing up to 2000 entries per page: <a href=/list/cs.DB/new?skip=0&show=1000 rel="nofollow"> fewer</a> | <span style="color: #454545">more</span> | <span style="color: #454545">all</span> </div> </div> </div> </div> </main> <footer style="clear: both;"> <div class="columns is-desktop" role="navigation" aria-label="Secondary" style="margin: -0.75em -0.75em 0.75em -0.75em">  <div class="column" style="padding: 0;"> <div class="columns"> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column" style="padding: 0;"> <div class="columns"> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>   </div> </footer> </div> <script src="/static/base/1.0.1/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Databases