CINXE.COM

Performance

<!DOCTYPE html> <html lang="en"> <head> <title>Performance </title> <meta name="viewport" content="width=device-width, initial-scale=1"> <link rel="apple-touch-icon" sizes="180x180" href="/static/browse/0.3.4/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="/static/browse/0.3.4/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="/static/browse/0.3.4/images/icons/favicon-16x16.png"> <link rel="manifest" href="/static/browse/0.3.4/images/icons/site.webmanifest"> <link rel="mask-icon" href="/static/browse/0.3.4/images/icons/safari-pinned-tab.svg" color="#5bbad5"> <meta name="msapplication-TileColor" content="#da532c"> <meta name="theme-color" content="#ffffff"> <link rel="stylesheet" type="text/css" media="screen" href="/static/browse/0.3.4/css/arXiv.css?v=20241206" /> <link rel="stylesheet" type="text/css" media="print" href="/static/browse/0.3.4/css/arXiv-print.css?v=20200611" /> <link rel="stylesheet" type="text/css" media="screen" href="/static/browse/0.3.4/css/browse_search.css" /> <script language="javascript" src="/static/browse/0.3.4/js/accordion.js" /></script> <script src="/static/browse/0.3.4/js/mathjaxToggle.min.js" type="text/javascript"></script> <script type="text/javascript" language="javascript">mathjaxToggle();</script> </head> <body class="with-cu-identity"> <div class="flex-wrap-footer"> <header> <a href="#content" class="is-sr-only">Skip to main content</a> <!-- start desktop header --> <div class="columns is-vcentered is-hidden-mobile" id="cu-identity"> <div class="column" id="cu-logo"> <a href="https://www.cornell.edu/"><img src="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg" alt="Cornell University" /></a> </div><div class="column" id="support-ack"> <span id="support-ack-url">We gratefully acknowledge support from the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors.</span> <a href="https://info.arxiv.org/about/donate.html" class="btn-header-donate">Donate</a> </div> </div> <div id="header" class="is-hidden-mobile"> <a aria-hidden="true" tabindex="-1" href="/IgnoreMe"></a> <div class="header-breadcrumbs"> <a href="/"><img src="/static/browse/0.3.4/images/arxiv-logo-one-color-white.svg" alt="arxiv logo" style="height:40px;"/></a> <span>&gt;</span> <a href="/list/cs.PF/recent">cs.PF</a> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div><!-- /end desktop header --> <div class="mobile-header"> <div class="columns is-mobile"> <div class="column logo-arxiv"><a href="https://arxiv.org/"><img src="/static/browse/0.3.4/images/arxiv-logomark-small-white.svg" alt="arXiv logo" style="height:60px;" /></a></div> <div class="column logo-cornell"><a href="https://www.cornell.edu/"> <picture> <source media="(min-width: 501px)" srcset="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg 400w" sizes="400w" /> <source srcset="/static/browse/0.3.4/images/icons/cu/cornell_seal_simple_black.svg 2x" /> <img src="/static/browse/0.3.4/images/icons/cu/cornell-reduced-white-SMALL.svg" alt="Cornell University Logo" /> </picture> </a></div> <div class="column nav" id="toggle-container" role="menubar"> <button class="toggle-control"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-white"><title>open search</title><path d="M505 442.7L405.3 343c-4.5-4.5-10.6-7-17-7H372c27.6-35.3 44-79.7 44-128C416 93.1 322.9 0 208 0S0 93.1 0 208s93.1 208 208 208c48.3 0 92.7-16.4 128-44v16.3c0 6.4 2.5 12.5 7 17l99.7 99.7c9.4 9.4 24.6 9.4 33.9 0l28.3-28.3c9.4-9.4 9.4-24.6.1-34zM208 336c-70.7 0-128-57.2-128-128 0-70.7 57.2-128 128-128 70.7 0 128 57.2 128 128 0 70.7-57.2 128-128 128z"/></svg></button> <div class="mobile-toggle-block toggle-target"> <form class="mobile-search-form" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <input class="input" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <input type="hidden" name="source" value="header"> <input type="hidden" name="searchtype" value="all"> <button class="button">GO</button> </div> </form> </div> <button class="toggle-control"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-white" role="menu"><title>open navigation menu</title><path d="M16 132h416c8.837 0 16-7.163 16-16V76c0-8.837-7.163-16-16-16H16C7.163 60 0 67.163 0 76v40c0 8.837 7.163 16 16 16zm0 160h416c8.837 0 16-7.163 16-16v-40c0-8.837-7.163-16-16-16H16c-8.837 0-16 7.163-16 16v40c0 8.837 7.163 16 16 16zm0 160h416c8.837 0 16-7.163 16-16v-40c0-8.837-7.163-16-16-16H16c-8.837 0-16 7.163-16 16v40c0 8.837 7.163 16 16 16z"/ ></svg></button> <div class="mobile-toggle-block toggle-target"> <nav class="mobile-menu" aria-labelledby="mobilemenulabel"> <h2 id="mobilemenulabel">quick links</h2> <ul> <li><a href="https://arxiv.org/login">Login</a></li> <li><a href="https://info.arxiv.org/help">Help Pages</a></li> <li><a href="https://info.arxiv.org/about">About</a></li> </ul> </nav> </div> </div> </div> </div><!-- /end mobile-header --> </header> <main> <div id="content"> <div id='content-inner'> <div id='dlpage'> <h1>Performance</h1> <ul> <li><a href="#item0">New submissions</a></li> <li><a href="#item2">Cross-lists</a></li> <li><a href="#item4">Replacements</a></li> </ul> <p>See <a id="recent-cs.PF" aria-labelledby="recent-cs.PF" href="/list/cs.PF/recent">recent</a> articles</p> <h3>Showing new listings for Tuesday, 18 March 2025</h3> <div class='paging'>Total of 7 entries </div> <div class='morefewer'>Showing up to 2000 entries per page: <a href=/list/cs.PF/new?skip=0&amp;show=1000 rel="nofollow"> fewer</a> | <span style="color: #454545">more</span> | <span style="color: #454545">all</span> </div> <dl id='articles'> <h3>New submissions (showing 1 of 1 entries)</h3> <dt> <a name='item1'>[1]</a> <a href ="/abs/2503.12185" title="Abstract" id="2503.12185"> arXiv:2503.12185 </a> [<a href="/pdf/2503.12185" title="Download PDF" id="pdf-2503.12185" aria-labelledby="pdf-2503.12185">pdf</a>, <a href="https://arxiv.org/html/2503.12185v1" title="View HTML" id="html-2503.12185" aria-labelledby="html-2503.12185" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.12185" title="Other formats" id="oth-2503.12185" aria-labelledby="oth-2503.12185">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> FAILS: A Framework for Automated Collection and Analysis of LLM Service Incidents </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Battaglini-Fischer,+S">S谩ndor Battaglini-Fischer</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Nishanthi">Nishanthi Srinivasan</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Szarvas,+B+L">B谩lint L谩szl贸 Szarvas</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Chu,+X">Xiaoyu Chu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Iosup,+A">Alexandru Iosup</a></div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> HotCloudPerf 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Performance (cs.PF)</span>; Distributed, Parallel, and Cluster Computing (cs.DC) </div> <p class='mathjax'> Large Language Model (LLM) services such as ChatGPT, DALLE, and Cursor have quickly become essential for society, businesses, and individuals, empowering applications such as chatbots, image generation, and code assistance. The complexity of LLM systems makes them prone to failures and affects their reliability and availability, yet their failure patterns are not fully understood, making it an emerging problem. However, there are limited datasets and studies in this area, particularly lacking an open-access tool for analyzing LLM service failures based on incident reports. Addressing these problems, in this work we propose FAILS, the first open-sourced framework for incident reports collection and analysis on different LLM services and providers. FAILS provides comprehensive data collection, analysis, and visualization capabilities, including:(1) It can automatically collect, clean, and update incident data through its data scraper and processing components;(2) It provides 17 types of failure analysis, allowing users to explore temporal trends of incidents, analyze service reliability metrics, such as Mean Time to Recovery (MTTR) and Mean Time Between Failures (MTBF);(3) It leverages advanced LLM tools to assist in data analysis and interpretation, enabling users to gain observations and insights efficiently. All functions are integrated in the backend, allowing users to easily access them through a web-based frontend interface. FAILS supports researchers, engineers, and general users to understand failure patterns and further mitigate operational incidents and outages in LLM services. The framework is publicly available on <a href="https://github.com/atlarge-research/FAILS" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> </dl> <dl id='articles'> <h3>Cross submissions (showing 2 of 2 entries)</h3> <dt> <a name='item2'>[2]</a> <a href ="/abs/2503.12668" title="Abstract" id="2503.12668"> arXiv:2503.12668 </a> (cross-list from cs.LG) [<a href="/pdf/2503.12668" title="Download PDF" id="pdf-2503.12668" aria-labelledby="pdf-2503.12668">pdf</a>, <a href="https://arxiv.org/html/2503.12668v1" title="View HTML" id="html-2503.12668" aria-labelledby="html-2503.12668" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.12668" title="Other formats" id="oth-2503.12668" aria-labelledby="oth-2503.12668">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> ZO2: Scalable Zeroth-Order Fine-Tuning for Extremely Large Language Models with Limited GPU Memory </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Wang,+L">Liangyu Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Ren,+J">Jie Ren</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Xu,+H">Hang Xu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Wang,+J">Junxiao Wang</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Xie,+H">Huanyi Xie</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Keyes,+D+E">David E. Keyes</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Wang,+D">Di Wang</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 14 pages, 7 figures </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Performance (cs.PF) </div> <p class='mathjax'> Fine-tuning large pre-trained LLMs generally demands extensive GPU memory. Traditional first-order optimizers like SGD encounter substantial difficulties due to increased memory requirements from storing activations and gradients during both the forward and backward phases as the model size expands. Alternatively, zeroth-order (ZO) techniques can compute gradients using just forward operations, eliminating the need to store activations. Furthermore, by leveraging CPU capabilities, it&#39;s feasible to enhance both the memory and processing power available to a single GPU. We propose a novel framework, ZO2 (Zeroth-Order Offloading), for efficient zeroth-order fine-tuning of LLMs with only limited GPU memory. Our framework dynamically shifts model parameters between the CPU and GPU as required, optimizing computation flow and maximizing GPU usage by minimizing downtime. This integration of parameter adjustments with ZO&#39;s double forward operations reduces unnecessary data movement, enhancing the fine-tuning efficacy. Additionally, our framework supports an innovative low-bit precision approach in AMP mode to streamline data exchanges between the CPU and GPU. Employing this approach allows us to fine-tune extraordinarily large models, such as the OPT-175B with more than 175 billion parameters, on a mere 18GB GPU--achievements beyond the reach of traditional methods. Moreover, our framework achieves these results with almost no additional time overhead and absolutely no accuracy loss compared to standard zeroth-order methods. ZO2&#39;s code has been open-sourced in <a href="https://github.com/liangyuwang/zo2" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item3'>[3]</a> <a href ="/abs/2503.13064" title="Abstract" id="2503.13064"> arXiv:2503.13064 </a> (cross-list from cs.AR) [<a href="/pdf/2503.13064" title="Download PDF" id="pdf-2503.13064" aria-labelledby="pdf-2503.13064">pdf</a>, <a href="https://arxiv.org/html/2503.13064v1" title="View HTML" id="html-2503.13064" aria-labelledby="html-2503.13064" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2503.13064" title="Other formats" id="oth-2503.13064" aria-labelledby="oth-2503.13064">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> HERMES: High-Performance RISC-V Memory Hierarchy for ML Workloads </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Suryadevara,+P">Pranav Suryadevara</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> 5 pages, 5 figures. Individual Project </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Hardware Architecture (cs.AR)</span>; Performance (cs.PF) </div> <p class='mathjax'> The growth of machine learning (ML) workloads has underscored the importance of efficient memory hierarchies to address bandwidth, latency, and scalability challenges. HERMES focuses on optimizing memory subsystems for RISC-V architectures to meet the computational needs of ML models such as CNNs, RNNs, and Transformers. This project explores state-of-the-art techniques such as advanced prefetching, tensor-aware caching, and hybrid memory models. The cornerstone of HERMES is the integration of shared L3 caches with fine-grained coherence protocols and specialized pathways to deep learning accelerators like Gemmini. Simulation tools like Gem5 and DRAMSim2 are used to evaluate baseline performance and scalability under representative ML workloads. The findings of this study highlight the design choices and anticipated challenges, paving the way for low-latency scalable memory operations for ML applications. </p> </div> </dd> </dl> <dl id='articles'> <h3>Replacement submissions (showing 4 of 4 entries)</h3> <dt> <a name='item4'>[4]</a> <a href ="/abs/2501.12469" title="Abstract" id="2501.12469"> arXiv:2501.12469 </a> (replaced) [<a href="/pdf/2501.12469" title="Download PDF" id="pdf-2501.12469" aria-labelledby="pdf-2501.12469">pdf</a>, <a href="https://arxiv.org/html/2501.12469v2" title="View HTML" id="html-2501.12469" aria-labelledby="html-2501.12469" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2501.12469" title="Other formats" id="oth-2501.12469" aria-labelledby="oth-2501.12469">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> An Empirical Characterization of Outages and Incidents in Public Services for Large Language Models </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Chu,+X">Xiaoyu Chu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Talluri,+S">Sacheendra Talluri</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Lu,+Q">Qingxian Lu</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Iosup,+A">Alexandru Iosup</a></div> <div class='list-journal-ref'><span class='descriptor'>Journal-ref:</span> 16th ACM/SPEC International Conference on Performance Engineering (ICPE 2025) </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Performance (cs.PF)</span>; Distributed, Parallel, and Cluster Computing (cs.DC) </div> <p class='mathjax'> People and businesses increasingly rely on public LLM services, such as ChatGPT, DALLE, and Claude. Understanding their outages, and particularly measuring their failure-recovery processes, is becoming a stringent problem. However, only limited studies exist in this emerging area. Addressing this problem, in this work we conduct an empirical characterization of outages and failure-recovery in public LLM services. We collect and prepare datasets for 8 commonly used LLM services across 3 major LLM providers, including market-leads OpenAI and Anthropic. We conduct a detailed analysis of failure recovery statistical properties, temporal patterns, co-occurrence, and the impact range of outage-causing incidents. We make over 10 observations, among which: (1) Failures in OpenAI&#39;s ChatGPT take longer to resolve but occur less frequently than those in Anthropic&#39;s Claude;(2) OpenAI and Anthropic service failures exhibit strong weekly and monthly periodicity; and (3) OpenAI services offer better failure-isolation than Anthropic services. Our research explains LLM failure characteristics and thus enables optimization in building and using LLM systems. FAIR data and code are publicly available on <a href="https://zenodo.org/records/14018219" rel="external noopener nofollow" class="link-external link-https">this https URL</a> and <a href="https://github.com/atlarge-research/llm-service-analysis" rel="external noopener nofollow" class="link-external link-https">this https URL</a>. </p> </div> </dd> <dt> <a name='item5'>[5]</a> <a href ="/abs/2403.06348" title="Abstract" id="2403.06348"> arXiv:2403.06348 </a> (replaced) [<a href="/pdf/2403.06348" title="Download PDF" id="pdf-2403.06348" aria-labelledby="pdf-2403.06348">pdf</a>, <a href="https://arxiv.org/html/2403.06348v2" title="View HTML" id="html-2403.06348" aria-labelledby="html-2403.06348" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2403.06348" title="Other formats" id="oth-2403.06348" aria-labelledby="oth-2403.06348">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Accelerating Sparse Tensor Decomposition Using Adaptive Linearized Representation </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Laukemann,+J">Jan Laukemann</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Helal,+A+E">Ahmed E. Helal</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Anderson,+S+I+G">S. Isaac Geronimo Anderson</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Checconi,+F">Fabio Checconi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Soh,+Y">Yongseok Soh</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Tithi,+J+J">Jesmin Jahan Tithi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Ranadive,+T">Teresa Ranadive</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Gravelle,+B+J">Brian J Gravelle</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Petrini,+F">Fabrizio Petrini</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Choi,+J">Jee Choi</a></div> <div class='list-comments mathjax'><span class='descriptor'>Comments:</span> Accepted to TPDS 2025 </div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Distributed, Parallel, and Cluster Computing (cs.DC)</span>; Data Structures and Algorithms (cs.DS); Performance (cs.PF) </div> <p class='mathjax'> High-dimensional sparse data emerge in many critical application domains such as healthcare and cybersecurity. To extract meaningful insights from massive volumes of these multi-dimensional data, scientists employ unsupervised analysis tools based on tensor decomposition (TD) methods. However, real-world sparse tensors exhibit highly irregular shapes and data distributions, which pose significant challenges for making efficient use of modern parallel processors. This study breaks the prevailing assumption that compressing sparse tensors into coarse-grained structures or along a particular dimension/mode is more efficient than keeping them in a fine-grained, mode-agnostic form. Our novel sparse tensor representation, Adaptive Linearized Tensor Order (ALTO), encodes tensors in a compact format that can be easily streamed from memory and is amenable to both caching and parallel execution. In contrast to existing compressed tensor formats, ALTO constructs one tensor copy that is agnostic to both the mode orientation and the irregular distribution of nonzero elements. To demonstrate the efficacy of ALTO, we propose a set of parallel TD algorithms that exploit the inherent data reuse of tensor computations to substantially reduce synchronization overhead, decrease memory footprint, and improve parallel performance. Additionally, we characterize the major execution bottlenecks of TD methods on the latest Intel Xeon Scalable processors and introduce dynamic adaptation heuristics to automatically select the best algorithm based on the sparse tensor characteristics. Across a diverse set of real-world data sets, ALTO outperforms the state-of-the-art approaches, achieving more than an order-of-magnitude speedup over the best mode-agnostic formats. Compared to the best mode-specific formats, ALTO achieves 5.1X geometric mean speedup at a fraction (25%) of their storage costs. </p> </div> </dd> <dt> <a name='item6'>[6]</a> <a href ="/abs/2409.05217" title="Abstract" id="2409.05217"> arXiv:2409.05217 </a> (replaced) [<a href="/pdf/2409.05217" title="Download PDF" id="pdf-2409.05217" aria-labelledby="pdf-2409.05217">pdf</a>, <a href="https://arxiv.org/html/2409.05217v3" title="View HTML" id="html-2409.05217" aria-labelledby="html-2409.05217" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2409.05217" title="Other formats" id="oth-2409.05217" aria-labelledby="oth-2409.05217">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> From Concept to Reality: 5G Positioning with Open-Source Implementation of UL-TDoA in OpenAirInterface </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Malik,+A">Adeel Malik</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Ahadi,+M">Mohsen Ahadi</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Kaltenberger,+F">Florian Kaltenberger</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Warnke,+K">Klaus Warnke</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Thinh,+N+T">Nguyen Tien Thinh</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Bouknana,+N">Nada Bouknana</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Thienot,+C">Cedric Thienot</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Onche,+G">Godswill Onche</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Arora,+S">Sagar Arora</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Information Theory (cs.IT)</span>; Emerging Technologies (cs.ET); Performance (cs.PF) </div> <p class='mathjax'> This paper presents, for the first time, an open-source implementation of the 3GPP Uplink Time Difference of Arrival (UL-TDoA) positioning method using the OpenAirInterface (OAI) framework. UL-TDoA is a critical positioning technique in 5G networks, leveraging the time differences of signal arrival at multiple base stations to determine the precise location of User Equipment (UE). This implementation aims to democratize access to advanced positioning technology by integrating UL-TDoA capabilities into both the Radio Access Network (RAN) and Core Network (CN) components of OAI, providing a comprehensive and 3GPP-compliant solution. The development includes the incorporation of essential protocol procedures, message flows, and interfaces as defined by 3GPP standards. Validation is conducted using two distinct methods: an OAI-RF simulator-based setup for controlled testing and an O-RAN-based Localization Testbed at EURECOM in real-world conditions. The results demonstrate the viability of this open-source UL-TDoA implementation, enabling precise positioning in various environments. By making this implementation publicly available, the study paves the way for widespread research, development, and innovation in the field of 5G positioning technologies, fostering collaboration and accelerating the advancement of cellular network positioning. </p> </div> </dd> <dt> <a name='item7'>[7]</a> <a href ="/abs/2502.16627" title="Abstract" id="2502.16627"> arXiv:2502.16627 </a> (replaced) [<a href="/pdf/2502.16627" title="Download PDF" id="pdf-2502.16627" aria-labelledby="pdf-2502.16627">pdf</a>, <a href="https://arxiv.org/html/2502.16627v3" title="View HTML" id="html-2502.16627" aria-labelledby="html-2502.16627" rel="noopener noreferrer" target="_blank">html</a>, <a href="/format/2502.16627" title="Other formats" id="oth-2502.16627" aria-labelledby="oth-2502.16627">other</a>] </dt> <dd> <div class='meta'> <div class='list-title mathjax'><span class='descriptor'>Title:</span> Energy-Efficient Transformer Inference: Optimization Strategies for Time Series Classification </div> <div class='list-authors'><a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Kermani,+A">Arshia Kermani</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Zeraatkar,+E">Ehsan Zeraatkar</a>, <a href="https://arxiv.org/search/cs?searchtype=author&amp;query=Irani,+H">Habib Irani</a></div> <div class='list-subjects'><span class='descriptor'>Subjects:</span> <span class="primary-subject">Machine Learning (cs.LG)</span>; Artificial Intelligence (cs.AI); Performance (cs.PF) </div> <p class='mathjax'> The increasing computational demands of transformer models in time series classification necessitate effective optimization strategies for energy-efficient deployment. Our study presents a systematic investigation of optimization techniques, focusing on structured pruning and quantization methods for transformer architectures. Through extensive experimentation on three distinct datasets (RefrigerationDevices, ElectricDevices, and PLAID), we quantitatively evaluate model performance and energy efficiency across different transformer configurations. Our experimental results demonstrate that static quantization reduces energy consumption by 29.14% while maintaining classification performance, and L1 pruning achieves a 63% improvement in inference speed with minimal accuracy degradation. Our findings provide valuable insights into the effectiveness of optimization strategies for transformer-based time series classification, establishing a foundation for efficient model deployment in resource-constrained environments. </p> </div> </dd> </dl> <div class='paging'>Total of 7 entries </div> <div class='morefewer'>Showing up to 2000 entries per page: <a href=/list/cs.PF/new?skip=0&amp;show=1000 rel="nofollow"> fewer</a> | <span style="color: #454545">more</span> | <span style="color: #454545">all</span> </div> </div> </div> </div> </main> <footer style="clear: both;"> <div class="columns is-desktop" role="navigation" aria-label="Secondary" style="margin: -0.75em -0.75em 0.75em -0.75em"> <!-- Macro-Column 1 --> <div class="column" style="padding: 0;"> <div class="columns"> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- End Macro-Column 1 --> <!-- Macro-Column 2 --> <div class="column" style="padding: 0;"> <div class="columns"> <div class="column"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul style="list-style: none; line-height: 2;"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> <!-- End Macro-Column 2 --> </div> </footer> </div> <script src="/static/base/1.0.1/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10