CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–29 of 29 results for author: <span class="mathjax">Gopalakrishnan, G</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Gopalakrishnan%2C+G">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Gopalakrishnan, G"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Gopalakrishnan%2C+G&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Gopalakrishnan, G"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.08946">arXiv:2503.08946</a> <span> [<a href="https://arxiv.org/pdf/2503.08946">pdf</a>, <a href="https://arxiv.org/format/2503.08946">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> SIMT/GPU Data Race Verification using ISCC and Intermediary Code Representations: A Case Study </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Osterhout%2C+A">Andrew Osterhout</a>, <a href="/search/cs?searchtype=author&query=Gopalakrishnan%2C+G">Ganesh Gopalakrishnan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.08946v1-abstract-short" style="display: inline;"> It is often difficult to write code that you can ensure will be executed in the right order when programing for parallel compute tasks. Due to the way that today's parallel compute hardware, primarily Graphical Processing Units (GPUs), allows you to write code. It is easy to write code that may result in one thread reading or modifying data before it should, thus resulting in a data race. It would… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.08946v1-abstract-full').style.display = 'inline'; document.getElementById('2503.08946v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.08946v1-abstract-full" style="display: none;"> It is often difficult to write code that you can ensure will be executed in the right order when programing for parallel compute tasks. Due to the way that today's parallel compute hardware, primarily Graphical Processing Units (GPUs), allows you to write code. It is easy to write code that may result in one thread reading or modifying data before it should, thus resulting in a data race. It would be useful to have a tool that could verify that the code will execute as expected. However, most static analysis done at the language level has to be completely retooled to work on a different languages. Therefore, it would be of great use to be able to perform verification and analysis on the Memory Model of a parallel compute code, in a lower level intermediary representations that most languages pass through on their way to something that the GPU hardware can understand. This body of work aims to deal with the question of if there is still enough of the information in the intermediary representations to be able to perform memory model verification to check for data races. To determine this we plan to analyze as a case study the GeSpMM Sparse Matrix Multiplication Algorithm, implemented in CUDA C++ with the LLVM compiler and Julia with CUDA.jl. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.08946v1-abstract-full').style.display = 'none'; document.getElementById('2503.08946v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Formal Verification of Parallel Algorithms, Static Analysis, Intermediary Code Representations, CUDA, GPU</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.05924">arXiv:2503.05924</a> <span> [<a href="https://arxiv.org/pdf/2503.05924">pdf</a>, <a href="https://arxiv.org/format/2503.05924">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Programming Languages">cs.PL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Symbolic Computation">cs.SC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> </div> </div> <p class="title is-5 mathjax"> Satire: Computing Rigorous Bounds for Floating-Point Rounding Error in Mixed-Precision Loop-Free Programs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Tirpankar%2C+T">Tanmay Tirpankar</a>, <a href="/search/cs?searchtype=author&query=Das%2C+A">Arnab Das</a>, <a href="/search/cs?searchtype=author&query=Gopalakrishnan%2C+G">Ganesh Gopalakrishnan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.05924v1-abstract-short" style="display: inline;"> Techniques that rigorously bound the overall rounding error exhibited by a numerical program are of significant interest for communities developing numerical software. However, there are few available tools today that can be used to rigorously bound errors in programs that employ conditional statements (a basic need) as well as mixed-precision arithmetic (a direction of significant future interest… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.05924v1-abstract-full').style.display = 'inline'; document.getElementById('2503.05924v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.05924v1-abstract-full" style="display: none;"> Techniques that rigorously bound the overall rounding error exhibited by a numerical program are of significant interest for communities developing numerical software. However, there are few available tools today that can be used to rigorously bound errors in programs that employ conditional statements (a basic need) as well as mixed-precision arithmetic (a direction of significant future interest) employing global optimization in error analysis. In this paper, we present a new tool that fills this void while also employing an abstraction-guided optimization approach to allow designers to trade error-bound tightness for gains in analysis time -- useful when searching for design alternatives. We first present the basic rigorous analysis framework of Satire and then show how to extend it to incorporate abstractions, conditionals, and mixed-precision arithmetic. We begin by describing Satire's design and its performance on a collection of benchmark examples. We then describe these aspects of Satire: (1) how the error-bound and tool execution time vary with the abstraction level; (2) the additional machinery to handle conditional expression branches, including defining the concepts of instability jumps and instability window widths and measuring these quantities; and (3) how the error changes when a mix of precision values are used. To showcase how \satire can add value during design, we start with a Conjugate Gradient solver and demonstrate how its step size and search direction are affected by different precision settings. Satire is freely available for evaluation, and can be used during the design of numerical routines to effect design tradeoffs guided by rigorous empirical error guarantees. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.05924v1-abstract-full').style.display = 'none'; document.getElementById('2503.05924v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">22 pgs, 8 figures, 4 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2502.15999">arXiv:2502.15999</a> <span> [<a href="https://arxiv.org/pdf/2502.15999">pdf</a>, <a href="https://arxiv.org/ps/2502.15999">ps</a>, <a href="https://arxiv.org/format/2502.15999">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> </div> </div> <p class="title is-5 mathjax"> An SMT Formalization of Mixed-Precision Matrix Multiplication: Modeling Three Generations of Tensor Cores </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Valpey%2C+B">Benjamin Valpey</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xinyi Li</a>, <a href="/search/cs?searchtype=author&query=Pai%2C+S">Sreepathi Pai</a>, <a href="/search/cs?searchtype=author&query=Gopalakrishnan%2C+G">Ganesh Gopalakrishnan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2502.15999v1-abstract-short" style="display: inline;"> Many recent computational accelerators provide non-standard (e.g., reduced precision) arithmetic operations to enhance performance for floating-point matrix multiplication. Unfortunately, the properties of these accelerators are not widely understood and lack sufficient descriptions of their behavior. This makes it difficult for tool builders beyond the original vendor to target or simulate the ha… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15999v1-abstract-full').style.display = 'inline'; document.getElementById('2502.15999v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2502.15999v1-abstract-full" style="display: none;"> Many recent computational accelerators provide non-standard (e.g., reduced precision) arithmetic operations to enhance performance for floating-point matrix multiplication. Unfortunately, the properties of these accelerators are not widely understood and lack sufficient descriptions of their behavior. This makes it difficult for tool builders beyond the original vendor to target or simulate the hardware correctly, or for algorithm designers to be confident in their code. To address these gaps, prior studies have probed the behavior of these units with manually crafted tests. Such tests are cumbersome to design, and adapting them as the accelerators evolve requires repeated manual effort. We present a formal model for the tensor cores of Nvidia's Volta, Turing, and Ampere GPUs. We identify specific properties -- rounding mode, precision, and accumulation order -- that drive these cores' behavior. We formalize these properties and then use the formalization to automatically generate discriminating inputs that illustrate differences among machines. Our results confirm many of the findings of previous tensor core studies, but also identify subtle disagreements. In particular, Nvidia's machines do not, as previously reported, use round-to-zero for accumulation, and their 5-term accumulator requires 3 extra carry-out bits for full accuracy. Using our formal model, we analyze two existing algorithms that use half-precision tensor cores to accelerate single-precision multiplication with error correction. Our analysis reveals that the newer algorithm, designed to be more accurate than the first, is actually less accurate for certain inputs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2502.15999v1-abstract-full').style.display = 'none'; document.getElementById('2502.15999v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 February, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2025. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear in the 17th NASA Formal Methods Symposium (June 11-13, 2025)</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> B.2.4; G.1.0; F.2.1 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.11971">arXiv:2408.11971</a> <span> [<a href="https://arxiv.org/pdf/2408.11971">pdf</a>, <a href="https://arxiv.org/format/2408.11971">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> HoSZp: An Efficient Homomorphic Error-bounded Lossy Compressor for Scientific Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Agarwal%2C+T">Tripti Agarwal</a>, <a href="/search/cs?searchtype=author&query=Di%2C+S">Sheng Di</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+J">Jiajun Huang</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Y">Yafan Huang</a>, <a href="/search/cs?searchtype=author&query=Gopalakrishnan%2C+G">Ganesh Gopalakrishnan</a>, <a href="/search/cs?searchtype=author&query=Underwood%2C+R">Robert Underwood</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+K">Kai Zhao</a>, <a href="/search/cs?searchtype=author&query=Liang%2C+X">Xin Liang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+G">Guanpeng Li</a>, <a href="/search/cs?searchtype=author&query=Cappello%2C+F">Franck Cappello</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.11971v1-abstract-short" style="display: inline;"> Error-bounded lossy compression has been a critical technique to significantly reduce the sheer amounts of simulation datasets for high-performance computing (HPC) scientific applications while effectively controlling the data distortion based on user-specified error bound. In many real-world use cases, users must perform computational operations on the compressed data (a.k.a. homomorphic compress… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.11971v1-abstract-full').style.display = 'inline'; document.getElementById('2408.11971v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.11971v1-abstract-full" style="display: none;"> Error-bounded lossy compression has been a critical technique to significantly reduce the sheer amounts of simulation datasets for high-performance computing (HPC) scientific applications while effectively controlling the data distortion based on user-specified error bound. In many real-world use cases, users must perform computational operations on the compressed data (a.k.a. homomorphic compression). However, none of the existing error-bounded lossy compressors support the homomorphism, inevitably resulting in undesired decompression costs. In this paper, we propose a novel homomorphic error-bounded lossy compressor (called HoSZp), which supports not only error-bounding features but efficient computations (including negation, addition, multiplication, mean, variance, etc.) on the compressed data without the complete decompression step, which is the first attempt to the best of our knowledge. We develop several optimization strategies to maximize the overall compression ratio and execution performance. We evaluate HoSZp compared to other state-of-the-art lossy compressors based on multiple real-world scientific application datasets. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.11971v1-abstract-full').style.display = 'none'; document.getElementById('2408.11971v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 7 figures, 9 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2406.11209">arXiv:2406.11209</a> <span> [<a href="https://arxiv.org/pdf/2406.11209">pdf</a>, <a href="https://arxiv.org/format/2406.11209">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3624062.3625122">10.1145/3624062.3625122 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> What Operations can be Performed Directly on Compressed Arrays, and with What Error? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Agarwal%2C+T">Tripti Agarwal</a>, <a href="/search/cs?searchtype=author&query=Dam%2C+H">Harvey Dam</a>, <a href="/search/cs?searchtype=author&query=Khalifa%2C+D+B">Dorra Ben Khalifa</a>, <a href="/search/cs?searchtype=author&query=Martel%2C+M">Matthieu Martel</a>, <a href="/search/cs?searchtype=author&query=Sadayappan%2C+P">P. Sadayappan</a>, <a href="/search/cs?searchtype=author&query=Gopalakrishnan%2C+G">Ganesh Gopalakrishnan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2406.11209v1-abstract-short" style="display: inline;"> In response to the rapidly escalating costs of computing with large matrices and tensors caused by data movement, several lossy compression methods have been developed to significantly reduce data volumes. Unfortunately, all these methods require the data to be decompressed before further computations are done. In this work, we develop a lossy compressor that allows a dozen fairly fundamental oper… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.11209v1-abstract-full').style.display = 'inline'; document.getElementById('2406.11209v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2406.11209v1-abstract-full" style="display: none;"> In response to the rapidly escalating costs of computing with large matrices and tensors caused by data movement, several lossy compression methods have been developed to significantly reduce data volumes. Unfortunately, all these methods require the data to be decompressed before further computations are done. In this work, we develop a lossy compressor that allows a dozen fairly fundamental operations directly on compressed data while offering good compression ratios and modest errors. We implement a new compressor PyBlaz based on the familiar GPU-powered PyTorch framework, and evaluate it on three non-trivial applications, choosing different number systems for internal representation. Our results demonstrate that the compressed-domain operations achieve good scalability with problem sizes while incurring errors well within acceptable limits. To our best knowledge, this is the first such lossy compressor that supports compressed-domain operations while achieving acceptable performance as well as error. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2406.11209v1-abstract-full').style.display = 'none'; document.getElementById('2406.11209v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 June, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">An extended but earlier version of paper in https://dl.acm.org/doi/10.1145/3624062.3625122 published at the DRBSD Workshop in 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.15632">arXiv:2403.15632</a> <span> [<a href="https://arxiv.org/pdf/2403.15632">pdf</a>, <a href="https://arxiv.org/format/2403.15632">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Programming Languages">cs.PL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Mathematical Software">cs.MS</span> </div> </div> <p class="title is-5 mathjax"> FlowFPX: Nimble Tools for Debugging Floating-Point Exceptions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Allred%2C+T">Taylor Allred</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xinyi Li</a>, <a href="/search/cs?searchtype=author&query=Wiersdorf%2C+A">Ashton Wiersdorf</a>, <a href="/search/cs?searchtype=author&query=Greenman%2C+B">Ben Greenman</a>, <a href="/search/cs?searchtype=author&query=Gopalakrishnan%2C+G">Ganesh Gopalakrishnan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.15632v1-abstract-short" style="display: inline;"> Reliable numerical computations are central to scientific computing, but the floating-point arithmetic that enables large-scale models is error-prone. Numeric exceptions are a common occurrence and can propagate through code, leading to flawed results. This paper presents FlowFPX, a toolkit for systematically debugging floating-point exceptions by recording their flow, coalescing exception context… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.15632v1-abstract-full').style.display = 'inline'; document.getElementById('2403.15632v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.15632v1-abstract-full" style="display: none;"> Reliable numerical computations are central to scientific computing, but the floating-point arithmetic that enables large-scale models is error-prone. Numeric exceptions are a common occurrence and can propagate through code, leading to flawed results. This paper presents FlowFPX, a toolkit for systematically debugging floating-point exceptions by recording their flow, coalescing exception contexts, and fuzzing in select locations. These tools help scientists discover when exceptions happen and track down their origin, smoothing the way to a reliable codebase. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.15632v1-abstract-full').style.display = 'none'; document.getElementById('2403.15632v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Presented at JuliaCon 2023; to appear in JuliaCon proceedings</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.00232">arXiv:2403.00232</a> <span> [<a href="https://arxiv.org/pdf/2403.00232">pdf</a>, <a href="https://arxiv.org/format/2403.00232">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> FTTN: Feature-Targeted Testing for Numerical Properties of NVIDIA & AMD Matrix Accelerators </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Li%2C+X">Xinyi Li</a>, <a href="/search/cs?searchtype=author&query=Li%2C+A">Ang Li</a>, <a href="/search/cs?searchtype=author&query=Fang%2C+B">Bo Fang</a>, <a href="/search/cs?searchtype=author&query=Swirydowicz%2C+K">Katarzyna Swirydowicz</a>, <a href="/search/cs?searchtype=author&query=Laguna%2C+I">Ignacio Laguna</a>, <a href="/search/cs?searchtype=author&query=Gopalakrishnan%2C+G">Ganesh Gopalakrishnan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.00232v1-abstract-short" style="display: inline;"> NVIDIA Tensor Cores and AMD Matrix Cores (together called Matrix Accelerators) are of growing interest in high-performance computing and machine learning owing to their high performance. Unfortunately, their numerical behaviors are not publicly documented, including the number of extra precision bits maintained, the accumulation order of addition, and predictable subnormal number handling during c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.00232v1-abstract-full').style.display = 'inline'; document.getElementById('2403.00232v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.00232v1-abstract-full" style="display: none;"> NVIDIA Tensor Cores and AMD Matrix Cores (together called Matrix Accelerators) are of growing interest in high-performance computing and machine learning owing to their high performance. Unfortunately, their numerical behaviors are not publicly documented, including the number of extra precision bits maintained, the accumulation order of addition, and predictable subnormal number handling during computations. This makes it impossible to reliably port codes across these differing accelerators. This paper contributes a collection of {\em Feature Targeted Tests for Numerical Properties} that that help determine these features across five floating-point formats, four rounding modes and additional that highlight the rounding behaviors and preservation of extra precision bits. To show the practical relevance of FTTN, we design a simple matrix-multiplication test designed with insights gathered from our feature-tests. We executed this very simple test on five platforms, producing different answers: V100, A100, and MI250X produced 0, MI100 produced 255.875, and Hopper H100 produced 191.875. Our matrix multiplication tests employ patterns found in iterative refinement-based algorithms, highlighting the need to check for significant result variability when porting code across GPUs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.00232v1-abstract-full').style.display = 'none'; document.getElementById('2403.00232v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.02441">arXiv:2402.02441</a> <span> [<a href="https://arxiv.org/pdf/2402.02441">pdf</a>, <a href="https://arxiv.org/format/2402.02441">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Mathematical Software">cs.MS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation">stat.CO</span> </div> </div> <p class="title is-5 mathjax"> TopoX: A Suite of Python Packages for Machine Learning on Topological Domains </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hajij%2C+M">Mustafa Hajij</a>, <a href="/search/cs?searchtype=author&query=Papillon%2C+M">Mathilde Papillon</a>, <a href="/search/cs?searchtype=author&query=Frantzen%2C+F">Florian Frantzen</a>, <a href="/search/cs?searchtype=author&query=Agerberg%2C+J">Jens Agerberg</a>, <a href="/search/cs?searchtype=author&query=AlJabea%2C+I">Ibrahem AlJabea</a>, <a href="/search/cs?searchtype=author&query=Ballester%2C+R">Rub茅n Ballester</a>, <a href="/search/cs?searchtype=author&query=Battiloro%2C+C">Claudio Battiloro</a>, <a href="/search/cs?searchtype=author&query=Bern%C3%A1rdez%2C+G">Guillermo Bern谩rdez</a>, <a href="/search/cs?searchtype=author&query=Birdal%2C+T">Tolga Birdal</a>, <a href="/search/cs?searchtype=author&query=Brent%2C+A">Aiden Brent</a>, <a href="/search/cs?searchtype=author&query=Chin%2C+P">Peter Chin</a>, <a href="/search/cs?searchtype=author&query=Escalera%2C+S">Sergio Escalera</a>, <a href="/search/cs?searchtype=author&query=Fiorellino%2C+S">Simone Fiorellino</a>, <a href="/search/cs?searchtype=author&query=Gardaa%2C+O+H">Odin Hoff Gardaa</a>, <a href="/search/cs?searchtype=author&query=Gopalakrishnan%2C+G">Gurusankar Gopalakrishnan</a>, <a href="/search/cs?searchtype=author&query=Govil%2C+D">Devendra Govil</a>, <a href="/search/cs?searchtype=author&query=Hoppe%2C+J">Josef Hoppe</a>, <a href="/search/cs?searchtype=author&query=Karri%2C+M+R">Maneel Reddy Karri</a>, <a href="/search/cs?searchtype=author&query=Khouja%2C+J">Jude Khouja</a>, <a href="/search/cs?searchtype=author&query=Lecha%2C+M">Manuel Lecha</a>, <a href="/search/cs?searchtype=author&query=Livesay%2C+N">Neal Livesay</a>, <a href="/search/cs?searchtype=author&query=Mei%C3%9Fner%2C+J">Jan Mei脽ner</a>, <a href="/search/cs?searchtype=author&query=Mukherjee%2C+S">Soham Mukherjee</a>, <a href="/search/cs?searchtype=author&query=Nikitin%2C+A">Alexander Nikitin</a>, <a href="/search/cs?searchtype=author&query=Papamarkou%2C+T">Theodore Papamarkou</a> , et al. (18 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.02441v5-abstract-short" style="display: inline;"> We introduce TopoX, a Python software suite that provides reliable and user-friendly building blocks for computing and machine learning on topological domains that extend graphs: hypergraphs, simplicial, cellular, path and combinatorial complexes. TopoX consists of three packages: TopoNetX facilitates constructing and computing on these domains, including working with nodes, edges and higher-order… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.02441v5-abstract-full').style.display = 'inline'; document.getElementById('2402.02441v5-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.02441v5-abstract-full" style="display: none;"> We introduce TopoX, a Python software suite that provides reliable and user-friendly building blocks for computing and machine learning on topological domains that extend graphs: hypergraphs, simplicial, cellular, path and combinatorial complexes. TopoX consists of three packages: TopoNetX facilitates constructing and computing on these domains, including working with nodes, edges and higher-order cells; TopoEmbedX provides methods to embed topological domains into vector spaces, akin to popular graph-based embedding algorithms such as node2vec; TopoModelX is built on top of PyTorch and offers a comprehensive toolbox of higher-order message passing functions for neural networks on topological domains. The extensively documented and unit-tested source code of TopoX is available under MIT license at https://pyt-team.github.io/}{https://pyt-team.github.io/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.02441v5-abstract-full').style.display = 'none'; document.getElementById('2402.02441v5-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.17184">arXiv:2401.17184</a> <span> [<a href="https://arxiv.org/pdf/2401.17184">pdf</a>, <a href="https://arxiv.org/format/2401.17184">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Mathematical Software">cs.MS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> </div> </div> <p class="title is-5 mathjax"> Rigorous Error Analysis for Logarithmic Number Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nguyen%2C+T+S">Thanh Son Nguyen</a>, <a href="/search/cs?searchtype=author&query=Solovyev%2C+A">Alexey Solovyev</a>, <a href="/search/cs?searchtype=author&query=Gopalakrishnan%2C+G">Ganesh Gopalakrishnan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.17184v1-abstract-short" style="display: inline;"> Logarithmic Number Systems (LNS) hold considerable promise in helping reduce the number of bits needed to represent a high dynamic range of real-numbers with finite precision, and also efficiently support multiplication and division. However, under LNS, addition and subtraction turn into non-linear functions that must be approximated - typically using precomputed table-based functions. Additionall… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.17184v1-abstract-full').style.display = 'inline'; document.getElementById('2401.17184v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.17184v1-abstract-full" style="display: none;"> Logarithmic Number Systems (LNS) hold considerable promise in helping reduce the number of bits needed to represent a high dynamic range of real-numbers with finite precision, and also efficiently support multiplication and division. However, under LNS, addition and subtraction turn into non-linear functions that must be approximated - typically using precomputed table-based functions. Additionally, multiple layers of error correction are typically needed to improve result accuracy. Unfortunately, previous efforts have not characterized the resulting error bound. We provide the first rigorous analysis of LNS, covering detailed techniques such as co-transformation that are crucial to implementing subtraction with reasonable accuracy. We provide theorems capturing the error due to table interpolations, the finite precision of pre-computed values in the tables, and the error introduced by fix-point multiplications involved in LNS implementations. We empirically validate our analysis using a Python implementation, showing that our analytical bounds are tight, and that our testing campaign generates inputs diverse-enough to almost match (but not exceed) the analytical bounds. We close with discussions on how to adapt our analysis to LNS systems with different bases and also discuss many pragmatic ramifications of our work in the broader arena of scientific computing and machine learning. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.17184v1-abstract-full').style.display = 'none'; document.getElementById('2401.17184v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">42 pages, 14 figures, 6 tables</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 65G50 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> G.1 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.08586">arXiv:2401.08586</a> <span> [<a href="https://arxiv.org/pdf/2401.08586">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> A GPU accelerated mixed-precision Smoothed Particle Hydrodynamics framework with cell-based relative coordinates </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mao%2C+Z">Zirui Mao</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xinyi Li</a>, <a href="/search/cs?searchtype=author&query=Hu%2C+S">Shenyang Hu</a>, <a href="/search/cs?searchtype=author&query=Gopalakrishnan%2C+G">Ganesh Gopalakrishnan</a>, <a href="/search/cs?searchtype=author&query=Li%2C+A">Ang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.08586v2-abstract-short" style="display: inline;"> Smoothed Particle Hydrodynamics (SPH) is essential for modeling complex large-deformation problems across various applications, requiring significant computational power. A major portion of SPH computation time is dedicated to the Nearest Neighboring Particle Search (NNPS) process. While advanced NNPS algorithms have been developed to enhance SPH efficiency, the potential efficiency gains from mod… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.08586v2-abstract-full').style.display = 'inline'; document.getElementById('2401.08586v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.08586v2-abstract-full" style="display: none;"> Smoothed Particle Hydrodynamics (SPH) is essential for modeling complex large-deformation problems across various applications, requiring significant computational power. A major portion of SPH computation time is dedicated to the Nearest Neighboring Particle Search (NNPS) process. While advanced NNPS algorithms have been developed to enhance SPH efficiency, the potential efficiency gains from modern computation hardware remain underexplored. This study investigates the impact of GPU parallel architecture, low-precision computing on GPUs, and GPU memory management on NNPS efficiency. Our approach employs a GPU-accelerated mixed-precision SPH framework, utilizing low-precision float-point 16 (FP16) for NNPS while maintaining high precision for other components. To ensure FP16 accuracy in NNPS, we introduce a Relative Coordinated-based Link List (RCLL) algorithm, storing FP16 relative coordinates of particles within background cells. Our testing results show three significant speedup rounds for CPU-based NNPS algorithms. The first comes from parallel GPU computations, with up to a 1000x efficiency gain. The second is achieved through low-precision GPU computing, where the proposed FP16-based RCLL algorithm offers a 1.5x efficiency improvement over the FP64-based approach on GPUs. By optimizing GPU memory bandwidth utilization, the efficiency of the FP16 RCLL algorithm can be further boosted by 2.7x, as demonstrated in an example with 1 million particles. Our code is released at https://github.com/pnnl/lpNNPS4SPH. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.08586v2-abstract-full').style.display = 'none'; document.getElementById('2401.08586v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.04701">arXiv:2401.04701</a> <span> [<a href="https://arxiv.org/pdf/2401.04701">pdf</a>, <a href="https://arxiv.org/format/2401.04701">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> HiRace: Accurate and Fast Source-Level Race Checking of GPU Programs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jacobson%2C+J">John Jacobson</a>, <a href="/search/cs?searchtype=author&query=Burtscher%2C+M">Martin Burtscher</a>, <a href="/search/cs?searchtype=author&query=Gopalakrishnan%2C+G">Ganesh Gopalakrishnan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.04701v1-abstract-short" style="display: inline;"> Data races are egregious parallel programming bugs on CPUs. They are even worse on GPUs due to the hierarchical thread and memory structure, which makes it possible to write code that is correctly synchronized within a thread group while not being correct across groups. Thus far, all major data-race checkers for GPUs suffer from at least one of the following problems: they do not check races in gl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.04701v1-abstract-full').style.display = 'inline'; document.getElementById('2401.04701v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.04701v1-abstract-full" style="display: none;"> Data races are egregious parallel programming bugs on CPUs. They are even worse on GPUs due to the hierarchical thread and memory structure, which makes it possible to write code that is correctly synchronized within a thread group while not being correct across groups. Thus far, all major data-race checkers for GPUs suffer from at least one of the following problems: they do not check races in global memory, do not work on recent GPUs, scale poorly, have not been extensively tested, miss simple data races, or are not dependable without detailed knowledge of the compiler. Our new data-race detection tool, HiRace, overcomes these limitations. Its key novelty is an innovative parallel finite-state machine that condenses an arbitrarily long access history into a constant-length state, thus allowing it to handle large and long-running programs. HiRace is a dynamic tool that checks for thread-group shared memory and global device memory races. It utilizes source-code instrumentation, thus avoiding driver, compiler, and hardware dependencies. We evaluate it on a modern calibrated data-race benchmark suite. On the 580 tested CUDA kernels, 346 of which contain data races, HiRace finds races missed by other tools without false alarms and is more than 10 times faster on average than the current state of the art, while incurring only half the memory overhead. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.04701v1-abstract-full').style.display = 'none'; document.getElementById('2401.04701v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2312.15640">arXiv:2312.15640</a> <span> [<a href="https://arxiv.org/pdf/2312.15640">pdf</a>, <a href="https://arxiv.org/format/2312.15640">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Report of the DOE/NSF Workshop on Correctness in Scientific Computing, June 2023, Orlando, FL </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gokhale%2C+M">Maya Gokhale</a>, <a href="/search/cs?searchtype=author&query=Gopalakrishnan%2C+G">Ganesh Gopalakrishnan</a>, <a href="/search/cs?searchtype=author&query=Mayo%2C+J">Jackson Mayo</a>, <a href="/search/cs?searchtype=author&query=Nagarakatte%2C+S">Santosh Nagarakatte</a>, <a href="/search/cs?searchtype=author&query=Rubio-Gonz%C3%A1lez%2C+C">Cindy Rubio-Gonz谩lez</a>, <a href="/search/cs?searchtype=author&query=Siegel%2C+S+F">Stephen F. Siegel</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2312.15640v2-abstract-short" style="display: inline;"> This report is a digest of the DOE/NSF Workshop on Correctness in Scientific Computing (CSC'23) held on June 17, 2023, as part of the Federated Computing Research Conference (FCRC) 2023. CSC was conceived by DOE and NSF to address the growing concerns about correctness among those who employ computational methods to perform large-scale scientific simulations. These concerns have escalated, given t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.15640v2-abstract-full').style.display = 'inline'; document.getElementById('2312.15640v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2312.15640v2-abstract-full" style="display: none;"> This report is a digest of the DOE/NSF Workshop on Correctness in Scientific Computing (CSC'23) held on June 17, 2023, as part of the Federated Computing Research Conference (FCRC) 2023. CSC was conceived by DOE and NSF to address the growing concerns about correctness among those who employ computational methods to perform large-scale scientific simulations. These concerns have escalated, given the complexity, scale, and heterogeneity of today's HPC software and hardware. If correctness is not proactively addressed, there is the risk of producing flawed science on top of unacceptable productivity losses faced by computational scientists and engineers. HPC systems are beginning to include data-driven methods, including machine learning and surrogate models, and their impact on overall HPC system correctness was also felt urgent to discuss. Stakeholders of correctness in this space were identified to belong to several sub-disciplines of computer science; from computer architecture researchers who design special-purpose hardware that offers high energy efficiencies; numerical algorithm designers who develop efficient computational schemes based on reduced precision as well as reduced data movement; all the way to researchers in programming language and formal methods who seek methodologies for correct compilation and verification. To include attendees with such a diverse set of backgrounds, CSC was held during the Federated Computing Research Conference (FCRC) 2023. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2312.15640v2-abstract-full').style.display = 'none'; document.getElementById('2312.15640v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 December, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">36 pages. DOE/NSF Workshop on Correctness in Scientific Computing (CSC 2023) was a PLDI 2023 workshop</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> B.8.1; C.1.4; D.0.3; D.0.4; D.1.3; D.2.1; D.2.5; D.3.1; G.1.2; J.2 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.05782">arXiv:2311.05782</a> <span> [<a href="https://arxiv.org/pdf/2311.05782">pdf</a>, <a href="https://arxiv.org/format/2311.05782">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> MPGemmFI: A Fault Injection Technique for Mixed Precision GEMM in ML Applications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Fang%2C+B">Bo Fang</a>, <a href="/search/cs?searchtype=author&query=Li%2C+X">Xinyi Li</a>, <a href="/search/cs?searchtype=author&query=Dam%2C+H">Harvey Dam</a>, <a href="/search/cs?searchtype=author&query=Tan%2C+C">Cheng Tan</a>, <a href="/search/cs?searchtype=author&query=Hari%2C+S+K+S">Siva Kumar Sastry Hari</a>, <a href="/search/cs?searchtype=author&query=Tsai%2C+T">Timothy Tsai</a>, <a href="/search/cs?searchtype=author&query=Laguna%2C+I">Ignacio Laguna</a>, <a href="/search/cs?searchtype=author&query=Tao%2C+D">Dingwen Tao</a>, <a href="/search/cs?searchtype=author&query=Gopalakrishnan%2C+G">Ganesh Gopalakrishnan</a>, <a href="/search/cs?searchtype=author&query=Nair%2C+P">Prashant Nair</a>, <a href="/search/cs?searchtype=author&query=Barker%2C+K">Kevin Barker</a>, <a href="/search/cs?searchtype=author&query=Li%2C+A">Ang Li</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.05782v1-abstract-short" style="display: inline;"> Emerging deep learning workloads urgently need fast general matrix multiplication (GEMM). To meet such demand, one of the critical features of machine-learning-specific accelerators such as NVIDIA Tensor Cores, AMD Matrix Cores, and Google TPUs is the support of mixed-precision enabled GEMM. For DNN models, lower-precision FP data formats and computation offer acceptable correctness but significan… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.05782v1-abstract-full').style.display = 'inline'; document.getElementById('2311.05782v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.05782v1-abstract-full" style="display: none;"> Emerging deep learning workloads urgently need fast general matrix multiplication (GEMM). To meet such demand, one of the critical features of machine-learning-specific accelerators such as NVIDIA Tensor Cores, AMD Matrix Cores, and Google TPUs is the support of mixed-precision enabled GEMM. For DNN models, lower-precision FP data formats and computation offer acceptable correctness but significant performance, area, and memory footprint improvement. While promising, the mixed-precision computation on error resilience remains unexplored. To this end, we develop a fault injection framework that systematically injects fault into the mixed-precision computation results. We investigate how the faults affect the accuracy of machine learning applications. Based on the error resilience characteristics, we offer lightweight error detection and correction solutions that significantly improve the overall model accuracy if the models experience hardware faults. The solutions can be efficiently integrated into the accelerator's pipelines. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.05782v1-abstract-full').style.display = 'none'; document.getElementById('2311.05782v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.15188">arXiv:2309.15188</a> <span> [<a href="https://arxiv.org/pdf/2309.15188">pdf</a>, <a href="https://arxiv.org/format/2309.15188">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.5281/zenodo.7958513">10.5281/zenodo.7958513 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> ICML 2023 Topological Deep Learning Challenge : Design and Results </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Papillon%2C+M">Mathilde Papillon</a>, <a href="/search/cs?searchtype=author&query=Hajij%2C+M">Mustafa Hajij</a>, <a href="/search/cs?searchtype=author&query=Jenne%2C+H">Helen Jenne</a>, <a href="/search/cs?searchtype=author&query=Mathe%2C+J">Johan Mathe</a>, <a href="/search/cs?searchtype=author&query=Myers%2C+A">Audun Myers</a>, <a href="/search/cs?searchtype=author&query=Papamarkou%2C+T">Theodore Papamarkou</a>, <a href="/search/cs?searchtype=author&query=Birdal%2C+T">Tolga Birdal</a>, <a href="/search/cs?searchtype=author&query=Dey%2C+T">Tamal Dey</a>, <a href="/search/cs?searchtype=author&query=Doster%2C+T">Tim Doster</a>, <a href="/search/cs?searchtype=author&query=Emerson%2C+T">Tegan Emerson</a>, <a href="/search/cs?searchtype=author&query=Gopalakrishnan%2C+G">Gurusankar Gopalakrishnan</a>, <a href="/search/cs?searchtype=author&query=Govil%2C+D">Devendra Govil</a>, <a href="/search/cs?searchtype=author&query=Guzm%C3%A1n-S%C3%A1enz%2C+A">Aldo Guzm谩n-S谩enz</a>, <a href="/search/cs?searchtype=author&query=Kvinge%2C+H">Henry Kvinge</a>, <a href="/search/cs?searchtype=author&query=Livesay%2C+N">Neal Livesay</a>, <a href="/search/cs?searchtype=author&query=Mukherjee%2C+S">Soham Mukherjee</a>, <a href="/search/cs?searchtype=author&query=Samaga%2C+S+N">Shreyas N. Samaga</a>, <a href="/search/cs?searchtype=author&query=Ramamurthy%2C+K+N">Karthikeyan Natesan Ramamurthy</a>, <a href="/search/cs?searchtype=author&query=Karri%2C+M+R">Maneel Reddy Karri</a>, <a href="/search/cs?searchtype=author&query=Rosen%2C+P">Paul Rosen</a>, <a href="/search/cs?searchtype=author&query=Sanborn%2C+S">Sophia Sanborn</a>, <a href="/search/cs?searchtype=author&query=Walters%2C+R">Robin Walters</a>, <a href="/search/cs?searchtype=author&query=Agerberg%2C+J">Jens Agerberg</a>, <a href="/search/cs?searchtype=author&query=Barikbin%2C+S">Sadrodin Barikbin</a>, <a href="/search/cs?searchtype=author&query=Battiloro%2C+C">Claudio Battiloro</a> , et al. (31 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.15188v4-abstract-short" style="display: inline;"> This paper presents the computational challenge on topological deep learning that was hosted within the ICML 2023 Workshop on Topology and Geometry in Machine Learning. The competition asked participants to provide open-source implementations of topological neural networks from the literature by contributing to the python packages TopoNetX (data processing) and TopoModelX (deep learning). The chal… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.15188v4-abstract-full').style.display = 'inline'; document.getElementById('2309.15188v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.15188v4-abstract-full" style="display: none;"> This paper presents the computational challenge on topological deep learning that was hosted within the ICML 2023 Workshop on Topology and Geometry in Machine Learning. The competition asked participants to provide open-source implementations of topological neural networks from the literature by contributing to the python packages TopoNetX (data processing) and TopoModelX (deep learning). The challenge attracted twenty-eight qualifying submissions in its two-month duration. This paper describes the design of the challenge and summarizes its main findings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.15188v4-abstract-full').style.display = 'none'; document.getElementById('2309.15188v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2306.06238">arXiv:2306.06238</a> <span> [<a href="https://arxiv.org/pdf/2306.06238">pdf</a>, <a href="https://arxiv.org/format/2306.06238">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> Understanding the Effect of the Long Tail on Neural Network Compression </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dam%2C+H">Harvey Dam</a>, <a href="/search/cs?searchtype=author&query=Joseph%2C+V">Vinu Joseph</a>, <a href="/search/cs?searchtype=author&query=Bhaskara%2C+A">Aditya Bhaskara</a>, <a href="/search/cs?searchtype=author&query=Gopalakrishnan%2C+G">Ganesh Gopalakrishnan</a>, <a href="/search/cs?searchtype=author&query=Muralidharan%2C+S">Saurav Muralidharan</a>, <a href="/search/cs?searchtype=author&query=Garland%2C+M">Michael Garland</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2306.06238v3-abstract-short" style="display: inline;"> Network compression is now a mature sub-field of neural network research: over the last decade, significant progress has been made towards reducing the size of models and speeding up inference, while maintaining the classification accuracy. However, many works have observed that focusing on just the overall accuracy can be misguided. E.g., it has been shown that mismatches between the full and com… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.06238v3-abstract-full').style.display = 'inline'; document.getElementById('2306.06238v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2306.06238v3-abstract-full" style="display: none;"> Network compression is now a mature sub-field of neural network research: over the last decade, significant progress has been made towards reducing the size of models and speeding up inference, while maintaining the classification accuracy. However, many works have observed that focusing on just the overall accuracy can be misguided. E.g., it has been shown that mismatches between the full and compressed models can be biased towards under-represented classes. This raises the important research question, can we achieve network compression while maintaining "semantic equivalence" with the original network? In this work, we study this question in the context of the "long tail" phenomenon in computer vision datasets observed by Feldman, et al. They argue that memorization of certain inputs (appropriately defined) is essential to achieving good generalization. As compression limits the capacity of a network (and hence also its ability to memorize), we study the question: are mismatches between the full and compressed models correlated with the memorized training data? We present positive evidence in this direction for image classification tasks, by considering different base architectures and compression schemes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2306.06238v3-abstract-full').style.display = 'none'; document.getElementById('2306.06238v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 June, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.06407">arXiv:2110.06407</a> <span> [<a href="https://arxiv.org/pdf/2110.06407">pdf</a>, <a href="https://arxiv.org/ps/2110.06407">ps</a>, <a href="https://arxiv.org/format/2110.06407">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Multiagent Systems">cs.MA</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Efficient Linearizability Checking for Actor-based Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Al-Mahfoudh%2C+M+S">Mohammed S. Al-Mahfoudh</a>, <a href="/search/cs?searchtype=author&query=Stutsman%2C+R">Ryan Stutsman</a>, <a href="/search/cs?searchtype=author&query=Gopalakrishnan%2C+G">Ganesh Gopalakrishnan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.06407v2-abstract-short" style="display: inline;"> Recent demand for distributed software had led to a surge in popularity in actor-based frameworks. However, even with the stylized message passing model of actors, writing correct distributed software is still difficult. We present our work on linearizability checking in DS2, an integrated framework for specifying, synthesizing, and testing distributed actor systems. The key insight of our approac… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.06407v2-abstract-full').style.display = 'inline'; document.getElementById('2110.06407v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.06407v2-abstract-full" style="display: none;"> Recent demand for distributed software had led to a surge in popularity in actor-based frameworks. However, even with the stylized message passing model of actors, writing correct distributed software is still difficult. We present our work on linearizability checking in DS2, an integrated framework for specifying, synthesizing, and testing distributed actor systems. The key insight of our approach is that often subcomponents of distributed actor systems represent common algorithms or data structures (e.g.\ a distributed hash table or tree) that can be validated against a simple sequential model of the system. This makes it easy for developers to validate their concurrent actor systems without complex specifications. DS2 automatically explores the concurrent schedules that system could arrive at, and it compares observed output of the system to ensure it is equivalent to what the sequential implementation could have produced. We describe DS2's linearizability checking and test it on several concurrent replication algorithms from the literature. We explore in detail how different algorithms for enumerating the model schedule space fare in finding bugs in actor systems, and we present our own refinements on algorithms for exploring actor system schedules that we show are effective in finding bugs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.06407v2-abstract-full').style.display = 'none'; document.getElementById('2110.06407v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This article was submitted around Feb 2021 to the journal of "Software Practice and Experience" and not yet finished the review process. So they allow us to submit it to one more personal archival service e.g. this one. That is, this is unpublished work yet. The updated version is accepted in Software Practice and Experience (SPE) Journal as of July 15, 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2109.09265">arXiv:2109.09265</a> <span> [<a href="https://arxiv.org/pdf/2109.09265">pdf</a>, <a href="https://arxiv.org/format/2109.09265">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Mathematical Software">cs.MS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> </div> <p class="title is-5 mathjax"> Merlion: A Machine Learning Library for Time Series </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bhatnagar%2C+A">Aadyot Bhatnagar</a>, <a href="/search/cs?searchtype=author&query=Kassianik%2C+P">Paul Kassianik</a>, <a href="/search/cs?searchtype=author&query=Liu%2C+C">Chenghao Liu</a>, <a href="/search/cs?searchtype=author&query=Lan%2C+T">Tian Lan</a>, <a href="/search/cs?searchtype=author&query=Yang%2C+W">Wenzhuo Yang</a>, <a href="/search/cs?searchtype=author&query=Cassius%2C+R">Rowan Cassius</a>, <a href="/search/cs?searchtype=author&query=Sahoo%2C+D">Doyen Sahoo</a>, <a href="/search/cs?searchtype=author&query=Arpit%2C+D">Devansh Arpit</a>, <a href="/search/cs?searchtype=author&query=Subramanian%2C+S">Sri Subramanian</a>, <a href="/search/cs?searchtype=author&query=Woo%2C+G">Gerald Woo</a>, <a href="/search/cs?searchtype=author&query=Saha%2C+A">Amrita Saha</a>, <a href="/search/cs?searchtype=author&query=Jagota%2C+A+K">Arun Kumar Jagota</a>, <a href="/search/cs?searchtype=author&query=Gopalakrishnan%2C+G">Gokulakrishnan Gopalakrishnan</a>, <a href="/search/cs?searchtype=author&query=Singh%2C+M">Manpreet Singh</a>, <a href="/search/cs?searchtype=author&query=Krithika%2C+K+C">K C Krithika</a>, <a href="/search/cs?searchtype=author&query=Maddineni%2C+S">Sukumar Maddineni</a>, <a href="/search/cs?searchtype=author&query=Cho%2C+D">Daeki Cho</a>, <a href="/search/cs?searchtype=author&query=Zong%2C+B">Bo Zong</a>, <a href="/search/cs?searchtype=author&query=Zhou%2C+Y">Yingbo Zhou</a>, <a href="/search/cs?searchtype=author&query=Xiong%2C+C">Caiming Xiong</a>, <a href="/search/cs?searchtype=author&query=Savarese%2C+S">Silvio Savarese</a>, <a href="/search/cs?searchtype=author&query=Hoi%2C+S">Steven Hoi</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+H">Huan Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2109.09265v1-abstract-short" style="display: inline;"> We introduce Merlion, an open-source machine learning library for time series. It features a unified interface for many commonly used models and datasets for anomaly detection and forecasting on both univariate and multivariate time series, along with standard pre/post-processing layers. It has several modules to improve ease-of-use, including visualization, anomaly score calibration to improve in… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.09265v1-abstract-full').style.display = 'inline'; document.getElementById('2109.09265v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2109.09265v1-abstract-full" style="display: none;"> We introduce Merlion, an open-source machine learning library for time series. It features a unified interface for many commonly used models and datasets for anomaly detection and forecasting on both univariate and multivariate time series, along with standard pre/post-processing layers. It has several modules to improve ease-of-use, including visualization, anomaly score calibration to improve interpetability, AutoML for hyperparameter tuning and model selection, and model ensembling. Merlion also provides a unique evaluation framework that simulates the live deployment and re-training of a model in production. This library aims to provide engineers and researchers a one-stop solution to rapidly develop models for their specific time series needs and benchmark them across multiple time series datasets. In this technical report, we highlight Merlion's architecture and major functionalities, and we report benchmark numbers across different baseline models and ensembles. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2109.09265v1-abstract-full').style.display = 'none'; document.getElementById('2109.09265v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">22 pages, 1 figure, 14 tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2105.11064">arXiv:2105.11064</a> <span> [<a href="https://arxiv.org/pdf/2105.11064">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Automated Dynamic Concurrency Analysis for Go </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Taheri%2C+S">Saeed Taheri</a>, <a href="/search/cs?searchtype=author&query=Gopalakrishnan%2C+G">Ganesh Gopalakrishnan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2105.11064v1-abstract-short" style="display: inline;"> The concurrency features of the Go language have proven versatile in the development of a number of concurrency systems. However, correctness methods to address challenges in Go concurrency debugging have not received much attention. In this work, we present an automatic dynamic tracing mechanism that efficiently captures and helps analyze the whole-program concurrency model. Using an enhancement… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.11064v1-abstract-full').style.display = 'inline'; document.getElementById('2105.11064v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2105.11064v1-abstract-full" style="display: none;"> The concurrency features of the Go language have proven versatile in the development of a number of concurrency systems. However, correctness methods to address challenges in Go concurrency debugging have not received much attention. In this work, we present an automatic dynamic tracing mechanism that efficiently captures and helps analyze the whole-program concurrency model. Using an enhancement to the built-in tracer package of Go and a framework that collects dynamic traces from application execution, we enable thorough post-mortem analysis for concurrency debugging. Preliminary results about the effectiveness and scalability (up to more than 2K goroutines) of our proposed dynamic tracing for concurrent debugging are presented. We discuss the future direction for exploiting dynamic tracing towards accelerating concurrent bug exposure. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.11064v1-abstract-full').style.display = 'none'; document.getElementById('2105.11064v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 May, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2012.01604">arXiv:2012.01604</a> <span> [<a href="https://arxiv.org/pdf/2012.01604">pdf</a>, <a href="https://arxiv.org/format/2012.01604">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Going Beyond Classification Accuracy Metrics in Model Compression </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Joseph%2C+V">Vinu Joseph</a>, <a href="/search/cs?searchtype=author&query=Siddiqui%2C+S+A">Shoaib Ahmed Siddiqui</a>, <a href="/search/cs?searchtype=author&query=Bhaskara%2C+A">Aditya Bhaskara</a>, <a href="/search/cs?searchtype=author&query=Gopalakrishnan%2C+G">Ganesh Gopalakrishnan</a>, <a href="/search/cs?searchtype=author&query=Muralidharan%2C+S">Saurav Muralidharan</a>, <a href="/search/cs?searchtype=author&query=Garland%2C+M">Michael Garland</a>, <a href="/search/cs?searchtype=author&query=Ahmed%2C+S">Sheraz Ahmed</a>, <a href="/search/cs?searchtype=author&query=Dengel%2C+A">Andreas Dengel</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2012.01604v2-abstract-short" style="display: inline;"> With the rise in edge-computing devices, there has been an increasing demand to deploy energy and resource-efficient models. A large body of research has been devoted to developing methods that can reduce the size of the model considerably without affecting the standard metrics such as top-1 accuracy. However, these pruning approaches tend to result in a significant mismatch in other metrics such… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.01604v2-abstract-full').style.display = 'inline'; document.getElementById('2012.01604v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2012.01604v2-abstract-full" style="display: none;"> With the rise in edge-computing devices, there has been an increasing demand to deploy energy and resource-efficient models. A large body of research has been devoted to developing methods that can reduce the size of the model considerably without affecting the standard metrics such as top-1 accuracy. However, these pruning approaches tend to result in a significant mismatch in other metrics such as fairness across classes and explainability. To combat such misalignment, we propose a novel multi-part loss function inspired by the knowledge-distillation literature. Through extensive experiments, we demonstrate the effectiveness of our approach across different compression algorithms, architectures, tasks as well as datasets. In particular, we obtain up to $4.1\times$ reduction in the number of prediction mismatches between the compressed and reference models, and up to $5.7\times$ in cases where the reference model makes the correct prediction; all while making no changes to the compression algorithm, and minor modifications to the loss function. Furthermore, we demonstrate how inducing simple alignment between the predictions of the models naturally improves the alignment on other metrics including fairness and attributions. Our framework can thus serve as a simple plug-and-play component for compression algorithms in the future. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.01604v2-abstract-full').style.display = 'none'; document.getElementById('2012.01604v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 14 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 2 December, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2004.11960">arXiv:2004.11960</a> <span> [<a href="https://arxiv.org/pdf/2004.11960">pdf</a>, <a href="https://arxiv.org/format/2004.11960">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Programming Languages">cs.PL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Symbolic Computation">cs.SC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> </div> </div> <p class="title is-5 mathjax"> An Abstraction-guided Approach to Scalable and Rigorous Floating-Point Error Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Das%2C+A">Arnab Das</a>, <a href="/search/cs?searchtype=author&query=Briggs%2C+I">Ian Briggs</a>, <a href="/search/cs?searchtype=author&query=Gopalakrishnan%2C+G">Ganesh Gopalakrishnan</a>, <a href="/search/cs?searchtype=author&query=Panchekha%2C+P">Pavel Panchekha</a>, <a href="/search/cs?searchtype=author&query=Krishnamoorthy%2C+S">Sriram Krishnamoorthy</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2004.11960v3-abstract-short" style="display: inline;"> Automated techniques for rigorous floating-point round-off error analysis are important in areas including formal verification of correctness and precision tuning. Existing tools and techniques, while providing tight bounds, fail to analyze expressions with more than a few hundred operators, thus unable to cover important practical problems. In this work, we present Satire, a new tool that sheds l… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.11960v3-abstract-full').style.display = 'inline'; document.getElementById('2004.11960v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2004.11960v3-abstract-full" style="display: none;"> Automated techniques for rigorous floating-point round-off error analysis are important in areas including formal verification of correctness and precision tuning. Existing tools and techniques, while providing tight bounds, fail to analyze expressions with more than a few hundred operators, thus unable to cover important practical problems. In this work, we present Satire, a new tool that sheds light on how scalability and bound-tightness can be attained through a combination of incremental analysis, abstraction, and judicious use of concrete and symbolic evaluation. Satire has handled problems exceeding 200K operators. We present Satire's underlying error analysis approach, information-theoretic abstraction heuristics, and a wide range of case studies, with evaluation covering FFT, Lorenz system of equations, and various PDE stencil types. Our results demonstrate the tightness of Satire's bounds, its acceptable runtime, and valuable insights provided. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.11960v3-abstract-full').style.display = 'none'; document.getElementById('2004.11960v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 July, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 April, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">A more informative and updated version of this paper has been accepted for publication at SuperComputing 2020</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2004.04359">arXiv:2004.04359</a> <span> [<a href="https://arxiv.org/pdf/2004.04359">pdf</a>, <a href="https://arxiv.org/format/2004.04359">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Performance">cs.PF</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Numerical Analysis">math.NA</span> </div> </div> <p class="title is-5 mathjax"> FPDetect: Efficient Reasoning About Stencil Programs Using Selective Direct Evaluation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Das%2C+A">Arnab Das</a>, <a href="/search/cs?searchtype=author&query=Krishnamoorthy%2C+S">Sriram Krishnamoorthy</a>, <a href="/search/cs?searchtype=author&query=Briggs%2C+I">Ian Briggs</a>, <a href="/search/cs?searchtype=author&query=Gopalakrishnan%2C+G">Ganesh Gopalakrishnan</a>, <a href="/search/cs?searchtype=author&query=Tipireddy%2C+R">Ramakrishna Tipireddy</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2004.04359v4-abstract-short" style="display: inline;"> We present FPDetect, a low overhead approach for detecting logical errors and soft errors affecting stencil computations without generating false positives. We develop an offline analysis that tightly estimates the number of floating-point bits preserved across stencil applications. This estimate rigorously bounds the values expected in the data space of the computation. Violations of this bound c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.04359v4-abstract-full').style.display = 'inline'; document.getElementById('2004.04359v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2004.04359v4-abstract-full" style="display: none;"> We present FPDetect, a low overhead approach for detecting logical errors and soft errors affecting stencil computations without generating false positives. We develop an offline analysis that tightly estimates the number of floating-point bits preserved across stencil applications. This estimate rigorously bounds the values expected in the data space of the computation. Violations of this bound can be attributed with certainty to errors. FPDetect helps synthesize error detectors customized for user-specified levels of accuracy and coverage. FPDetect also enables overhead reduction techniques based on deploying these detectors coarsely in space and time. Experimental evaluations demonstrate the practicality of our approach. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2004.04359v4-abstract-full').style.display = 'none'; document.getElementById('2004.04359v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 August, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 April, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in Journal of ACM Transactions on Architecture and Code Optimization</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1911.02497">arXiv:1911.02497</a> <span> [<a href="https://arxiv.org/pdf/1911.02497">pdf</a>, <a href="https://arxiv.org/format/1911.02497">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">stat.ML</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/MM.2020.3012391">10.1109/MM.2020.3012391 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> A Programmable Approach to Neural Network Compression </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Joseph%2C+V">Vinu Joseph</a>, <a href="/search/cs?searchtype=author&query=Muralidharan%2C+S">Saurav Muralidharan</a>, <a href="/search/cs?searchtype=author&query=Garg%2C+A">Animesh Garg</a>, <a href="/search/cs?searchtype=author&query=Garland%2C+M">Michael Garland</a>, <a href="/search/cs?searchtype=author&query=Gopalakrishnan%2C+G">Ganesh Gopalakrishnan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1911.02497v2-abstract-short" style="display: inline;"> Deep neural networks (DNNs) frequently contain far more weights, represented at a higher precision, than are required for the specific task which they are trained to perform. Consequently, they can often be compressed using techniques such as weight pruning and quantization that reduce both the model size and inference time without appreciable loss in accuracy. However, finding the best compressio… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.02497v2-abstract-full').style.display = 'inline'; document.getElementById('1911.02497v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1911.02497v2-abstract-full" style="display: none;"> Deep neural networks (DNNs) frequently contain far more weights, represented at a higher precision, than are required for the specific task which they are trained to perform. Consequently, they can often be compressed using techniques such as weight pruning and quantization that reduce both the model size and inference time without appreciable loss in accuracy. However, finding the best compression strategy and corresponding target sparsity for a given DNN, hardware platform, and optimization objective currently requires expensive, frequently manual, trial-and-error experimentation. In this paper, we introduce a programmable system for model compression called Condensa. Users programmatically compose simple operators, in Python, to build more complex and practically interesting compression strategies. Given a strategy and user-provided objective (such as minimization of running time), Condensa uses a novel Bayesian optimization-based algorithm to automatically infer desirable sparsities. Our experiments on four real-world DNNs demonstrate memory footprint and hardware runtime throughput improvements of 188x and 2.59x, respectively, using at most ten samples per search. We have released a reference implementation of Condensa at https://github.com/NVlabs/condensa. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.02497v2-abstract-full').style.display = 'none'; document.getElementById('1911.02497v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 December, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 November, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This is an updated version of a paper published in IEEE Micro, vol. 40, no. 5, pp. 17-25, Sept.-Oct. 2020 at https://ieeexplore.ieee.org/document/9151283</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Micro, Volume: 40, Issue: 5, Sept.-Oct. 2020, pp. 17-25 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1909.11469">arXiv:1909.11469</a> <span> [<a href="https://arxiv.org/pdf/1909.11469">pdf</a>, <a href="https://arxiv.org/format/1909.11469">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Performance">cs.PF</span> </div> </div> <p class="title is-5 mathjax"> Message Scheduling for Performant, Many-Core Belief Propagation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Van+der+Merwe%2C+M">Mark Van der Merwe</a>, <a href="/search/cs?searchtype=author&query=Joseph%2C+V">Vinu Joseph</a>, <a href="/search/cs?searchtype=author&query=Gopalakrishnan%2C+G">Ganesh Gopalakrishnan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1909.11469v1-abstract-short" style="display: inline;"> Belief Propagation (BP) is a message-passing algorithm for approximate inference over Probabilistic Graphical Models (PGMs), finding many applications such as computer vision, error-correcting codes, and protein-folding. While general, the convergence and speed of the algorithm has limited its practical use on difficult inference problems. As an algorithm that is highly amenable to parallelization… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1909.11469v1-abstract-full').style.display = 'inline'; document.getElementById('1909.11469v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1909.11469v1-abstract-full" style="display: none;"> Belief Propagation (BP) is a message-passing algorithm for approximate inference over Probabilistic Graphical Models (PGMs), finding many applications such as computer vision, error-correcting codes, and protein-folding. While general, the convergence and speed of the algorithm has limited its practical use on difficult inference problems. As an algorithm that is highly amenable to parallelization, many-core Graphical Processing Units (GPUs) could significantly improve BP performance. Improving BP through many-core systems is non-trivial: the scheduling of messages in the algorithm strongly affects performance. We present a study of message scheduling for BP on GPUs. We demonstrate that BP exhibits a tradeoff between speed and convergence based on parallelism and show that existing message schedulings are not able to utilize this tradeoff. To this end, we present a novel randomized message scheduling approach, Randomized BP (RnBP), which outperforms existing methods on the GPU. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1909.11469v1-abstract-full').style.display = 'none'; document.getElementById('1909.11469v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 September, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1811.05618">arXiv:1811.05618</a> <span> [<a href="https://arxiv.org/pdf/1811.05618">pdf</a>, <a href="https://arxiv.org/format/1811.05618">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Performance">cs.PF</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3307681.3325960">10.1145/3307681.3325960 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Multi-level analysis of compiler induced variability and performance tradeoffs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Bentley%2C+M">Michael Bentley</a>, <a href="/search/cs?searchtype=author&query=Briggs%2C+I">Ian Briggs</a>, <a href="/search/cs?searchtype=author&query=Gopalakrishnan%2C+G">Ganesh Gopalakrishnan</a>, <a href="/search/cs?searchtype=author&query=Ahn%2C+D+H">Dong H. Ahn</a>, <a href="/search/cs?searchtype=author&query=Laguna%2C+I">Ignacio Laguna</a>, <a href="/search/cs?searchtype=author&query=Lee%2C+G+L">Gregory L. Lee</a>, <a href="/search/cs?searchtype=author&query=Jones%2C+H+E">Holger E. Jones</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1811.05618v2-abstract-short" style="display: inline;"> Successful HPC software applications are long-lived. When ported across machines and their compilers, these applications often produce different numerical results, many of which are unacceptable. Such variability is also a concern while optimizing the code more aggressively to gain performance. Efficient tools that help locate the program units (files and functions) within which most of the variab… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1811.05618v2-abstract-full').style.display = 'inline'; document.getElementById('1811.05618v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1811.05618v2-abstract-full" style="display: none;"> Successful HPC software applications are long-lived. When ported across machines and their compilers, these applications often produce different numerical results, many of which are unacceptable. Such variability is also a concern while optimizing the code more aggressively to gain performance. Efficient tools that help locate the program units (files and functions) within which most of the variability occurs are badly needed, both to plan for code ports and to root-cause errors due to variability when they happen in the field. In this work, we offer an enhanced version of the open-source testing framework FLiT to serve these roles. Key new features of FLiT include a suite of bisection algorithms that help locate the root causes of variability. Another added feature allows an analysis of the tradeoffs between performance and the degree of variability. Our new contributions also include a collection of case studies. Results on the MFEM finite-element library include variability/performance tradeoffs, and the identification of a (hitherto unknown) abnormal level of result-variability even under mild compiler optimizations. Results from studying the Laghos proxy application include identifying a significantly divergent floating-point result-variability and successful root-causing down to the problematic function over as little as 14 program executions. Finally, in an evaluation of 4,376 controlled injections of floating-point perturbations on the LULESH proxy application, we showed that the FLiT framework has 100 precision and recall in discovering the file and function locations of the injections all within an average of only 15 program executions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1811.05618v2-abstract-full').style.display = 'none'; document.getElementById('1811.05618v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 June, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 November, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 11 figures, accepted in HPDC 2019</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Report number:</span> LLNL-CONF-759867 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1807.10852">arXiv:1807.10852</a> <span> [<a href="https://arxiv.org/pdf/1807.10852">pdf</a>, <a href="https://arxiv.org/format/1807.10852">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Programming Languages">cs.PL</span> </div> </div> <p class="title is-5 mathjax"> Sparse Matrix Code Dependence Analysis Simplification at Compile Time </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mohammadi%2C+M+S">Mahdi Soltan Mohammadi</a>, <a href="/search/cs?searchtype=author&query=Cheshmi%2C+K">Kazem Cheshmi</a>, <a href="/search/cs?searchtype=author&query=Gopalakrishnan%2C+G">Ganesh Gopalakrishnan</a>, <a href="/search/cs?searchtype=author&query=Hall%2C+M">Mary Hall</a>, <a href="/search/cs?searchtype=author&query=Dehnavi%2C+M+M">Maryam Mehri Dehnavi</a>, <a href="/search/cs?searchtype=author&query=Venkat%2C+A">Anand Venkat</a>, <a href="/search/cs?searchtype=author&query=Yuki%2C+T">Tomofumi Yuki</a>, <a href="/search/cs?searchtype=author&query=Strout%2C+M+M">Michelle Mills Strout</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1807.10852v1-abstract-short" style="display: inline;"> Analyzing array-based computations to determine data dependences is useful for many applications including automatic parallelization, race detection, computation and communication overlap, verification, and shape analysis. For sparse matrix codes, array data dependence analysis is made more difficult by the use of index arrays that make it possible to store only the nonzero entries of the matrix (… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1807.10852v1-abstract-full').style.display = 'inline'; document.getElementById('1807.10852v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1807.10852v1-abstract-full" style="display: none;"> Analyzing array-based computations to determine data dependences is useful for many applications including automatic parallelization, race detection, computation and communication overlap, verification, and shape analysis. For sparse matrix codes, array data dependence analysis is made more difficult by the use of index arrays that make it possible to store only the nonzero entries of the matrix (e.g., in A[B[i]], B is an index array). Here, dependence analysis is often stymied by such indirect array accesses due to the values of the index array not being available at compile time. Consequently, many dependences cannot be proven unsatisfiable or determined until runtime. Nonetheless, index arrays in sparse matrix codes often have properties such as monotonicity of index array elements that can be exploited to reduce the amount of runtime analysis needed. In this paper, we contribute a formulation of array data dependence analysis that includes encoding index array properties as universally quantified constraints. This makes it possible to leverage existing SMT solvers to determine whether such dependences are unsatisfiable and significantly reduces the number of dependences that require runtime analysis in a set of eight sparse matrix kernels. Another contribution is an algorithm for simplifying the remaining satisfiable data dependences by discovering equalities and/or subset relationships. These simplifications are essential to make a runtime-inspection-based approach feasible. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1807.10852v1-abstract-full').style.display = 'none'; document.getElementById('1807.10852v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 July, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2018. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1709.04551">arXiv:1709.04551</a> <span> [<a href="https://arxiv.org/pdf/1709.04551">pdf</a>, <a href="https://arxiv.org/ps/1709.04551">ps</a>, <a href="https://arxiv.org/format/1709.04551">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> An Operational Semantic Basis for OpenMP Race Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Atzeni%2C+S">Simone Atzeni</a>, <a href="/search/cs?searchtype=author&query=Gopalakrishnan%2C+G">Ganesh Gopalakrishnan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1709.04551v1-abstract-short" style="display: inline;"> OpenMP is the de facto standard to exploit the on-node parallelism in new generation supercomputers.Despite its overall ease of use, even expert users are known to create OpenMP programs that harbor concurrency errors, of which one of the most insidious of errors are {\em data races}.OpenMP is also a rapidly evolving standard, which means that future data races may be introduced within unfamiliar… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1709.04551v1-abstract-full').style.display = 'inline'; document.getElementById('1709.04551v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1709.04551v1-abstract-full" style="display: none;"> OpenMP is the de facto standard to exploit the on-node parallelism in new generation supercomputers.Despite its overall ease of use, even expert users are known to create OpenMP programs that harbor concurrency errors, of which one of the most insidious of errors are {\em data races}.OpenMP is also a rapidly evolving standard, which means that future data races may be introduced within unfamiliar contexts.A simple and rigorous operational semantics for OpenMP can help build reliable race checkers and ward off future errors through programmer education and better tooling.This paper's key contribution is a simple operational semantics for OpenMP, with primitive events matching those generated by today's popular OpenMP runtimes and tracing methods such as OMPT.This makes our operational semantics more than a theoretical document for intellectual edification; it can serve as a blueprint for OpenMP event capture and tool building.We back this statement by summarizing the workings of a new data race checker for OpenMP being built based on this semantics.The larger purpose served by our semantics is to serve the needs of the OpenMP community with regard to their contemplated extensions to OpenMP, as well as future tooling efforts. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1709.04551v1-abstract-full').style.display = 'none'; document.getElementById('1709.04551v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 September, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2017. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 3 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1705.07478">arXiv:1705.07478</a> <span> [<a href="https://arxiv.org/pdf/1705.07478">pdf</a>, <a href="https://arxiv.org/format/1705.07478">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Report of the HPC Correctness Summit, Jan 25--26, 2017, Washington, DC </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gopalakrishnan%2C+G">Ganesh Gopalakrishnan</a>, <a href="/search/cs?searchtype=author&query=Hovland%2C+P+D">Paul D. Hovland</a>, <a href="/search/cs?searchtype=author&query=Iancu%2C+C">Costin Iancu</a>, <a href="/search/cs?searchtype=author&query=Krishnamoorthy%2C+S">Sriram Krishnamoorthy</a>, <a href="/search/cs?searchtype=author&query=Laguna%2C+I">Ignacio Laguna</a>, <a href="/search/cs?searchtype=author&query=Lethin%2C+R+A">Richard A. Lethin</a>, <a href="/search/cs?searchtype=author&query=Sen%2C+K">Koushik Sen</a>, <a href="/search/cs?searchtype=author&query=Siegel%2C+S+F">Stephen F. Siegel</a>, <a href="/search/cs?searchtype=author&query=Solar-Lezama%2C+A">Armando Solar-Lezama</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1705.07478v1-abstract-short" style="display: inline;"> Maintaining leadership in HPC requires the ability to support simulations at large scales and fidelity. In this study, we detail one of the most significant productivity challenges in achieving this goal, namely the increasing proclivity to bugs, especially in the face of growing hardware and software heterogeneity and sheer system scale. We identify key areas where timely new research must be pro… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1705.07478v1-abstract-full').style.display = 'inline'; document.getElementById('1705.07478v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1705.07478v1-abstract-full" style="display: none;"> Maintaining leadership in HPC requires the ability to support simulations at large scales and fidelity. In this study, we detail one of the most significant productivity challenges in achieving this goal, namely the increasing proclivity to bugs, especially in the face of growing hardware and software heterogeneity and sheer system scale. We identify key areas where timely new research must be proactively begun to address these challenges, and create new correctness tools that must ideally play a significant role even while ramping up toward exacale. We close with the proposal for a two-day workshop in which the problems identified in this report can be more broadly discussed, and specific plans to launch these new research thrusts identified. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1705.07478v1-abstract-full').style.display = 'none'; document.getElementById('1705.07478v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 May, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2017. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">57 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1606.08948">arXiv:1606.08948</a> <span> [<a href="https://arxiv.org/pdf/1606.08948">pdf</a>, <a href="https://arxiv.org/format/1606.08948">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Programming Languages">cs.PL</span> </div> </div> <p class="title is-5 mathjax"> PRESAGE: Protecting Structured Address Generation against Soft Errors </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Sharma%2C+V+C">Vishal Chandra Sharma</a>, <a href="/search/cs?searchtype=author&query=Gopalakrishnan%2C+G">Ganesh Gopalakrishnan</a>, <a href="/search/cs?searchtype=author&query=Krishnamoorthy%2C+S">Sriram Krishnamoorthy</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1606.08948v1-abstract-short" style="display: inline;"> Modern computer scaling trends in pursuit of larger component counts and power efficiency have, unfortunately, lead to less reliable hardware and consequently soft errors escaping into application data ("silent data corruptions"). Techniques to enhance system resilience hinge on the availability of efficient error detectors that have high detection rates, low false positive rates, and lower comput… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1606.08948v1-abstract-full').style.display = 'inline'; document.getElementById('1606.08948v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1606.08948v1-abstract-full" style="display: none;"> Modern computer scaling trends in pursuit of larger component counts and power efficiency have, unfortunately, lead to less reliable hardware and consequently soft errors escaping into application data ("silent data corruptions"). Techniques to enhance system resilience hinge on the availability of efficient error detectors that have high detection rates, low false positive rates, and lower computational overhead. Unfortunately, efficient detectors to detect faults during address generation (to index large arrays) have not been widely researched. We present a novel lightweight compiler-driven technique called PRESAGE for detecting bit-flips affecting structured address computations. A key insight underlying PRESAGE is that any address computation scheme that flows an already incurred error is better than a scheme that corrupts one particular array access but otherwise (falsely) appears to compute perfectly. Enabling the flow of errors allows one to situate detectors at loop exit points, and helps turn silent corruptions into easily detectable error situations. Our experiments using PolyBench benchmark suite indicate that PRESAGE-based error detectors have a high error-detection rate while incurring low overheads. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1606.08948v1-abstract-full').style.display = 'none'; document.getElementById('1606.08948v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 June, 2016; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2016. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1211.7356">arXiv:1211.7356</a> <span> [<a href="https://arxiv.org/pdf/1211.7356">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Networking and Internet Architecture">cs.NI</span> </div> </div> <p class="title is-5 mathjax"> WiGig and IEEE 802.11ad - For multi-gigabyte-per-second WPAN and WLAN </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=N.%2C+S+S">Sai Shankar N.</a>, <a href="/search/cs?searchtype=author&query=Dash%2C+D">Debashis Dash</a>, <a href="/search/cs?searchtype=author&query=Madi%2C+H+E">Hassan El Madi</a>, <a href="/search/cs?searchtype=author&query=Gopalakrishnan%2C+G">Guru Gopalakrishnan</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1211.7356v1-abstract-short" style="display: inline;"> The Wireless Gigabit Alliance (WiGig) and IEEE 802.11ad are developing a multigigabit wireless personal and local area network (WPAN/WLAN) specification in the 60 GHz millimeter wave band. Chipset manufacturers, original equipment manufacturers (OEMs), and telecom companies are also assisting in this development. 60 GHz millimeter wave transmission will scale the speed of WLANs and WPANs to 6.75 G… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1211.7356v1-abstract-full').style.display = 'inline'; document.getElementById('1211.7356v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1211.7356v1-abstract-full" style="display: none;"> The Wireless Gigabit Alliance (WiGig) and IEEE 802.11ad are developing a multigigabit wireless personal and local area network (WPAN/WLAN) specification in the 60 GHz millimeter wave band. Chipset manufacturers, original equipment manufacturers (OEMs), and telecom companies are also assisting in this development. 60 GHz millimeter wave transmission will scale the speed of WLANs and WPANs to 6.75 Gbit/s over distances less than 10 meters. This technology is the first of its kind and will eliminate the need for cable around personal computers, docking stations, and other consumer electronic devices. High-definition multimedia interface (HDMI), display port, USB 3.0, and peripheral component interconnect express (PCIe) 3.0 cables will all be eliminated. Fast downloads and uploads, wireless sync, and multi-gigabit-per-second WLANs will be possible over shorter distances. 60 GHz millimeter wave supports fast session transfer (FST) protocol, which makes it backward compatible with 5 GHz or 2.4 GHz WLAN so that end users experience the same range as in today's WLANs. IEEE 802.11ad specifies the physical (PHY) sublayer and medium access control (MAC) sublayer of the protocol stack. The MAC protocol is based on time-division multiple access (TDMA), and the PHY layer uses single carrier (SC) and orthogonal frequency division multiplexing (OFDM) to simultaneously enable low-power, high-performance applications. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1211.7356v1-abstract-full').style.display = 'none'; document.getElementById('1211.7356v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 November, 2012; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2012. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">24 pages, 18 figures. Submitted to ZTE Communications 2012</span> </p> </li> </ol> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>