CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;28 of 28 results for author: <span class="mathjax">Thakur, R</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&amp;query=Thakur%2C+R">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Thakur, R"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Thakur%2C+R&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Thakur, R"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.14434">arXiv:2408.14434</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2408.14434">pdf</a>, <a href="https://arxiv.org/format/2408.14434">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Employing Artificial Intelligence to Steer Exascale Workflows with Colmena </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ward%2C+L">Logan Ward</a>, <a href="/search/cs?searchtype=author&amp;query=Pauloski%2C+J+G">J. Gregory Pauloski</a>, <a href="/search/cs?searchtype=author&amp;query=Hayot-Sasson%2C+V">Valerie Hayot-Sasson</a>, <a href="/search/cs?searchtype=author&amp;query=Babuji%2C+Y">Yadu Babuji</a>, <a href="/search/cs?searchtype=author&amp;query=Brace%2C+A">Alexander Brace</a>, <a href="/search/cs?searchtype=author&amp;query=Chard%2C+R">Ryan Chard</a>, <a href="/search/cs?searchtype=author&amp;query=Chard%2C+K">Kyle Chard</a>, <a href="/search/cs?searchtype=author&amp;query=Thakur%2C+R">Rajeev Thakur</a>, <a href="/search/cs?searchtype=author&amp;query=Foster%2C+I">Ian Foster</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.14434v1-abstract-short" style="display: inline;"> Computational workflows are a common class of application on supercomputers, yet the loosely coupled and heterogeneous nature of workflows often fails to take full advantage of their capabilities. We created Colmena to leverage the massive parallelism of a supercomputer by using Artificial Intelligence (AI) to learn from and adapt a workflow as it executes. Colmena allows scientists to define how&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14434v1-abstract-full').style.display = 'inline'; document.getElementById('2408.14434v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.14434v1-abstract-full" style="display: none;"> Computational workflows are a common class of application on supercomputers, yet the loosely coupled and heterogeneous nature of workflows often fails to take full advantage of their capabilities. We created Colmena to leverage the massive parallelism of a supercomputer by using Artificial Intelligence (AI) to learn from and adapt a workflow as it executes. Colmena allows scientists to define how their application should respond to events (e.g., task completion) as a series of cooperative agents. In this paper, we describe the design of Colmena, the challenges we overcame while deploying applications on exascale systems, and the science workflows we have enhanced through interweaving AI. The scaling challenges we discuss include developing steering strategies that maximize node utilization, introducing data fabrics that reduce communication overhead of data-intensive tasks, and implementing workflow tasks that cache costly operations between invocations. These innovations coupled with a variety of application patterns accessible through our agent-based steering model have enabled science advances in chemistry, biophysics, and materials science using different types of AI. Our vision is that Colmena will spur creative solutions that harness AI across many domains of scientific computing. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.14434v1-abstract-full').style.display = 'none'; document.getElementById('2408.14434v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.13807">arXiv:2405.13807</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2405.13807">pdf</a>, <a href="https://arxiv.org/format/2405.13807">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> MPI Progress For All </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+H">Hui Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Latham%2C+R">Robert Latham</a>, <a href="/search/cs?searchtype=author&amp;query=Raffenetti%2C+K">Ken Raffenetti</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+Y">Yanfei Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Thakur%2C+R">Rajeev Thakur</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.13807v2-abstract-short" style="display: inline;"> The progression of communication in the Message Passing Interface (MPI) is not well defined, yet it is critical for application performance, particularly in achieving effective computation and communication overlap. The opaque nature of MPI progress poses significant challenges in advancing MPI within modern high-performance computing (HPC) practices. Firstly, the lack of clarity hinders the devel&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.13807v2-abstract-full').style.display = 'inline'; document.getElementById('2405.13807v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.13807v2-abstract-full" style="display: none;"> The progression of communication in the Message Passing Interface (MPI) is not well defined, yet it is critical for application performance, particularly in achieving effective computation and communication overlap. The opaque nature of MPI progress poses significant challenges in advancing MPI within modern high-performance computing (HPC) practices. Firstly, the lack of clarity hinders the development of explicit guidelines for enhancing computation and communication overlap in applications. Secondly, it prevents MPI from seamlessly integrating with contemporary programming paradigms, such as task-based runtimes and event-driven programming. Thirdly, it limits the extension of MPI functionalities from the user space. In this paper, we examine the role of MPI progress by analyzing the implementation details of MPI messaging. We then generalize the asynchronous communication pattern and identify key factors influencing application performance. Based on this analysis, we propose a set of MPI extensions designed to enable users to explicitly construct and manage an efficient progress engine. We provide example codes to demonstrate the use of these proposed APIs in achieving improved performance, adapting MPI to task-based or event-driven programming styles, and constructing collective algorithms that rival the performance of native implementations. Our approach is compared to previous efforts in the field, highlighting its reduced complexity and increased effectiveness. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.13807v2-abstract-full').style.display = 'none'; document.getElementById('2405.13807v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Not submitted</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2402.12274">arXiv:2402.12274</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2402.12274">pdf</a>, <a href="https://arxiv.org/format/2402.12274">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Designing and Prototyping Extensions to MPI in MPICH </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+H">Hui Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Raffenetti%2C+K">Ken Raffenetti</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+Y">Yanfei Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Gillis%2C+T">Thomas Gillis</a>, <a href="/search/cs?searchtype=author&amp;query=Latham%2C+R">Robert Latham</a>, <a href="/search/cs?searchtype=author&amp;query=Thakur%2C+R">Rajeev Thakur</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2402.12274v1-abstract-short" style="display: inline;"> As HPC system architectures and the applications running on them continue to evolve, the MPI standard itself must evolve. The trend in current and future HPC systems toward powerful nodes with multiple CPU cores and multiple GPU accelerators makes efficient support for hybrid programming critical for applications to achieve high performance. However, the support for hybrid programming in the MPI s&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.12274v1-abstract-full').style.display = 'inline'; document.getElementById('2402.12274v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2402.12274v1-abstract-full" style="display: none;"> As HPC system architectures and the applications running on them continue to evolve, the MPI standard itself must evolve. The trend in current and future HPC systems toward powerful nodes with multiple CPU cores and multiple GPU accelerators makes efficient support for hybrid programming critical for applications to achieve high performance. However, the support for hybrid programming in the MPI standard has not kept up with recent trends. The MPICH implementation of MPI provides a platform for implementing and experimenting with new proposals and extensions to fill this gap and to gain valuable experience and feedback before the MPI Forum can consider them for standardization. In this work, we detail six extensions implemented in MPICH to increase MPI interoperability with other runtimes, with a specific focus on heterogeneous architectures. First, the extension to MPI generalized requests lets applications integrate asynchronous tasks into MPI&#39;s progress engine. Second, the iovec extension to datatypes lets applications use MPI datatypes as a general-purpose data layout API beyond just MPI communications. Third, a new MPI object, MPIX stream, can be used by applications to identify execution contexts beyond MPI processes, including threads and GPU streams. MPIX stream communicators can be created to make existing MPI functions thread-aware and GPU-aware, thus providing applications with explicit ways to achieve higher performance. Fourth, MPIX Streams are extended to support the enqueue semantics for offloading MPI communications onto a GPU stream context. Fifth, thread communicators allow MPI communicators to be constructed with individual threads, thus providing a new level of interoperability between MPI and on-node runtimes such as OpenMP. Lastly, we present an extension to invoke MPI progress, which lets users spawn progress threads with fine-grained control. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2402.12274v1-abstract-full').style.display = 'none'; document.getElementById('2402.12274v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages. Submitted IJHPCA special issue</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.16551">arXiv:2401.16551</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2401.16551">pdf</a>, <a href="https://arxiv.org/format/2401.16551">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3615318.3615320">10.1145/3615318.3615320 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Frustrated with MPI+Threads? Try MPIxThreads! </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+H">Hui Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Raffenetti%2C+K">Ken Raffenetti</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+J">Junchao Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+Y">Yanfei Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Thakur%2C+R">Rajeev Thakur</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.16551v1-abstract-short" style="display: inline;"> MPI+Threads, embodied by the MPI/OpenMP hybrid programming model, is a parallel programming paradigm where threads are used for on-node shared-memory parallelization and MPI is used for multi-node distributed-memory parallelization. OpenMP provides an incremental approach to parallelize code, while MPI, with its isolated address space and explicit messaging API, affords straightforward paths to ob&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.16551v1-abstract-full').style.display = 'inline'; document.getElementById('2401.16551v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.16551v1-abstract-full" style="display: none;"> MPI+Threads, embodied by the MPI/OpenMP hybrid programming model, is a parallel programming paradigm where threads are used for on-node shared-memory parallelization and MPI is used for multi-node distributed-memory parallelization. OpenMP provides an incremental approach to parallelize code, while MPI, with its isolated address space and explicit messaging API, affords straightforward paths to obtain good parallel performance. However, MPI+Threads is not an ideal solution. Since MPI is unaware of the thread context, it cannot be used for interthread communication. This results in duplicated efforts to create separate and sometimes nested solutions for similar parallel tasks. In addition, because the MPI library is required to obey message-ordering semantics, mixing threads and MPI via MPI_THREAD_MULTIPLE can easily result in miserable performance due to accidental serializations. We propose a new MPI extension, MPIX Thread Communicator (threadcomm), that allows threads to be assigned distinct MPI ranks within thread parallel regions. The threadcomm extension combines both MPI processes and OpenMP threads to form a unified parallel environment. We show that this MPIxThreads (MPI Multiply Threads) paradigm allows OpenMP and MPI to work together in a complementary way to achieve both cleaner codes and better performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.16551v1-abstract-full').style.display = 'none'; document.getElementById('2401.16551v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.04607">arXiv:2310.04607</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2310.04607">pdf</a>, <a href="https://arxiv.org/format/2310.04607">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Performance">cs.PF</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> A Comprehensive Performance Study of Large Language Models on Novel AI Accelerators </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Emani%2C+M">Murali Emani</a>, <a href="/search/cs?searchtype=author&amp;query=Foreman%2C+S">Sam Foreman</a>, <a href="/search/cs?searchtype=author&amp;query=Sastry%2C+V">Varuni Sastry</a>, <a href="/search/cs?searchtype=author&amp;query=Xie%2C+Z">Zhen Xie</a>, <a href="/search/cs?searchtype=author&amp;query=Raskar%2C+S">Siddhisanket Raskar</a>, <a href="/search/cs?searchtype=author&amp;query=Arnold%2C+W">William Arnold</a>, <a href="/search/cs?searchtype=author&amp;query=Thakur%2C+R">Rajeev Thakur</a>, <a href="/search/cs?searchtype=author&amp;query=Vishwanath%2C+V">Venkatram Vishwanath</a>, <a href="/search/cs?searchtype=author&amp;query=Papka%2C+M+E">Michael E. Papka</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.04607v1-abstract-short" style="display: inline;"> Artificial intelligence (AI) methods have become critical in scientific applications to help accelerate scientific discovery. Large language models (LLMs) are being considered as a promising approach to address some of the challenging problems because of their superior generalization capabilities across domains. The effectiveness of the models and the accuracy of the applications is contingent upo&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.04607v1-abstract-full').style.display = 'inline'; document.getElementById('2310.04607v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.04607v1-abstract-full" style="display: none;"> Artificial intelligence (AI) methods have become critical in scientific applications to help accelerate scientific discovery. Large language models (LLMs) are being considered as a promising approach to address some of the challenging problems because of their superior generalization capabilities across domains. The effectiveness of the models and the accuracy of the applications is contingent upon their efficient execution on the underlying hardware infrastructure. Specialized AI accelerator hardware systems have recently become available for accelerating AI applications. However, the comparative performance of these AI accelerators on large language models has not been previously studied. In this paper, we systematically study LLMs on multiple AI accelerators and GPUs and evaluate their performance characteristics for these models. We evaluate these systems with (i) a micro-benchmark using a core transformer block, (ii) a GPT- 2 model, and (iii) an LLM-driven science use case, GenSLM. We present our findings and analyses of the models&#39; performance to better understand the intrinsic capabilities of AI accelerators. Furthermore, our analysis takes into account key factors such as sequence lengths, scaling behavior, sparsity, and sensitivity to gradient accumulation steps. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.04607v1-abstract-full').style.display = 'none'; document.getElementById('2310.04607v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2309.12616">arXiv:2309.12616</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2309.12616">pdf</a>, <a href="https://arxiv.org/format/2309.12616">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> Unlocking Model Insights: A Dataset for Automated Model Card Generation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Singh%2C+S">Shruti Singh</a>, <a href="/search/cs?searchtype=author&amp;query=Lodwal%2C+H">Hitesh Lodwal</a>, <a href="/search/cs?searchtype=author&amp;query=Malwat%2C+H">Husain Malwat</a>, <a href="/search/cs?searchtype=author&amp;query=Thakur%2C+R">Rakesh Thakur</a>, <a href="/search/cs?searchtype=author&amp;query=Singh%2C+M">Mayank Singh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2309.12616v1-abstract-short" style="display: inline;"> Language models (LMs) are no longer restricted to ML community, and instruction-tuned LMs have led to a rise in autonomous AI agents. As the accessibility of LMs grows, it is imperative that an understanding of their capabilities, intended usage, and development cycle also improves. Model cards are a popular practice for documenting detailed information about an ML model. To automate model card ge&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.12616v1-abstract-full').style.display = 'inline'; document.getElementById('2309.12616v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2309.12616v1-abstract-full" style="display: none;"> Language models (LMs) are no longer restricted to ML community, and instruction-tuned LMs have led to a rise in autonomous AI agents. As the accessibility of LMs grows, it is imperative that an understanding of their capabilities, intended usage, and development cycle also improves. Model cards are a popular practice for documenting detailed information about an ML model. To automate model card generation, we introduce a dataset of 500 question-answer pairs for 25 ML models that cover crucial aspects of the model, such as its training configurations, datasets, biases, architecture details, and training resources. We employ annotators to extract the answers from the original paper. Further, we explore the capabilities of LMs in generating model cards by answering questions. Our initial experiments with ChatGPT-3.5, LLaMa, and Galactica showcase a significant gap in the understanding of research papers by these aforementioned LMs as well as generating factual textual responses. We posit that our dataset can be used to train models to automate the generation of model cards from paper text and reduce human effort in the model card curation process. The complete dataset is available on https://osf.io/hqt7p/?view_only=3b9114e3904c4443bcd9f5c270158d37 <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2309.12616v1-abstract-full').style.display = 'none'; document.getElementById('2309.12616v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.05199">arXiv:2308.05199</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2308.05199">pdf</a>, <a href="https://arxiv.org/format/2308.05199">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> gZCCL: Compression-Accelerated Collective Communication Framework for GPU Clusters </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Huang%2C+J">Jiajun Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Di%2C+S">Sheng Di</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+X">Xiaodong Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhai%2C+Y">Yujia Zhai</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+J">Jinyang Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Huang%2C+Y">Yafan Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Raffenetti%2C+K">Ken Raffenetti</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+H">Hui Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+K">Kai Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+X">Xiaoyi Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Z">Zizhong Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Cappello%2C+F">Franck Cappello</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+Y">Yanfei Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Thakur%2C+R">Rajeev Thakur</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.05199v2-abstract-short" style="display: inline;"> GPU-aware collective communication has become a major bottleneck for modern computing platforms as GPU computing power rapidly rises. A traditional approach is to directly integrate lossy compression into GPU-aware collectives, which can lead to serious performance issues such as underutilized GPU devices and uncontrolled data distortion. In order to address these issues, in this paper, we propose&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.05199v2-abstract-full').style.display = 'inline'; document.getElementById('2308.05199v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.05199v2-abstract-full" style="display: none;"> GPU-aware collective communication has become a major bottleneck for modern computing platforms as GPU computing power rapidly rises. A traditional approach is to directly integrate lossy compression into GPU-aware collectives, which can lead to serious performance issues such as underutilized GPU devices and uncontrolled data distortion. In order to address these issues, in this paper, we propose gZCCL, a first-ever general framework that designs and optimizes GPU-aware, compression-enabled collectives with an accuracy-aware design to control error propagation. To validate our framework, we evaluate the performance on up to 512 NVIDIA A100 GPUs with real-world applications and datasets. Experimental results demonstrate that our gZCCL-accelerated collectives, including both collective computation (Allreduce) and collective data movement (Scatter), can outperform NCCL as well as Cray MPI by up to 4.5X and 28.7X, respectively. Furthermore, our accuracy evaluation with an image-stacking application confirms the high reconstructed data quality of our accuracy-aware framework. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.05199v2-abstract-full').style.display = 'none'; document.getElementById('2308.05199v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 9 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 13 figures, and 2 tables. ICS &#39;24</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.03930">arXiv:2308.03930</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2308.03930">pdf</a>, <a href="https://arxiv.org/format/2308.03930">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3605573.3605599">10.1145/3605573.3605599 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Quantifying the Performance Benefits of Partitioned Communication in MPI </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Gillis%2C+T">Thomas Gillis</a>, <a href="/search/cs?searchtype=author&amp;query=Raffenetti%2C+K">Ken Raffenetti</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+H">Hui Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+Y">Yanfei Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Thakur%2C+R">Rajeev Thakur</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.03930v2-abstract-short" style="display: inline;"> Partitioned communication was introduced in MPI 4.0 as a user-friendly interface to support pipelined communication patterns, particularly common in the context of MPI+threads. It provides the user with the ability to divide a global buffer into smaller independent chunks, called partitions, which can then be communicated independently. In this work we first model the performance gain that can be&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.03930v2-abstract-full').style.display = 'inline'; document.getElementById('2308.03930v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.03930v2-abstract-full" style="display: none;"> Partitioned communication was introduced in MPI 4.0 as a user-friendly interface to support pipelined communication patterns, particularly common in the context of MPI+threads. It provides the user with the ability to divide a global buffer into smaller independent chunks, called partitions, which can then be communicated independently. In this work we first model the performance gain that can be expected when using partitioned communication. Next, we describe the improvements we made to \mpich{} to enable those gains and provide a high-quality implementation of MPI partitioned communication. We then evaluate partitioned communication in various common use cases and assess the performance in comparison with other MPI point-to-point and one-sided approaches. Specifically, we first investigate two scenarios commonly encountered for small partition sizes in a multithreaded environment: thread contention and overhead of using many partitions. We propose two solutions to alleviate the measured penalty and demonstrate their use. We then focus on large messages and the gain obtained when exploiting the delay resulting from computations or load imbalance. We conclude with our perspectives on the benefits of partitioned communication and the various results obtained. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.03930v2-abstract-full').style.display = 'none'; document.getElementById('2308.03930v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">MSC Class:</span> 68M10; 68M14 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.10612">arXiv:2305.10612</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2305.10612">pdf</a>, <a href="https://arxiv.org/format/2305.10612">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3588195.3595955">10.1145/3588195.3595955 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Accelerating MPI Collectives with Process-in-Process-based Multi-object Techniques </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Huang%2C+J">Jiajun Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Ouyang%2C+K">Kaiming Ouyang</a>, <a href="/search/cs?searchtype=author&amp;query=Zhai%2C+Y">Yujia Zhai</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+J">Jinyang Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Si%2C+M">Min Si</a>, <a href="/search/cs?searchtype=author&amp;query=Raffenetti%2C+K">Ken Raffenetti</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+H">Hui Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Hori%2C+A">Atsushi Hori</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Z">Zizhong Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+Y">Yanfei Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Thakur%2C+R">Rajeev Thakur</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.10612v1-abstract-short" style="display: inline;"> In the exascale computing era, optimizing MPI collective performance in high-performance computing (HPC) applications is critical. Current algorithms face performance degradation due to system call overhead, page faults, or data-copy latency, affecting HPC applications&#39; efficiency and scalability. To address these issues, we propose PiP-MColl, a Process-in-Process-based Multi-object Inter-process&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.10612v1-abstract-full').style.display = 'inline'; document.getElementById('2305.10612v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.10612v1-abstract-full" style="display: none;"> In the exascale computing era, optimizing MPI collective performance in high-performance computing (HPC) applications is critical. Current algorithms face performance degradation due to system call overhead, page faults, or data-copy latency, affecting HPC applications&#39; efficiency and scalability. To address these issues, we propose PiP-MColl, a Process-in-Process-based Multi-object Inter-process MPI Collective design that maximizes small message MPI collective performance at scale. PiP-MColl features efficient multiple sender and receiver collective algorithms and leverages Process-in-Process shared memory techniques to eliminate unnecessary system call, page fault overhead, and extra data copy, improving intra- and inter-node message rate and throughput. Our design also boosts performance for larger messages, resulting in comprehensive improvement for various message sizes. Experimental results show that PiP-MColl outperforms popular MPI libraries, including OpenMPI, MVAPICH2, and Intel MPI, by up to 4.6X for MPI collectives like MPI_Scatter and MPI_Allgather. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.10612v1-abstract-full').style.display = 'none'; document.getElementById('2305.10612v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted by ACM HPDC 2023</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2304.03890">arXiv:2304.03890</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2304.03890">pdf</a>, <a href="https://arxiv.org/format/2304.03890">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> An Optimized Error-controlled MPI Collective Framework Integrated with Lossy Compression </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Huang%2C+J">Jiajun Huang</a>, <a href="/search/cs?searchtype=author&amp;query=Di%2C+S">Sheng Di</a>, <a href="/search/cs?searchtype=author&amp;query=Yu%2C+X">Xiaodong Yu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhai%2C+Y">Yujia Zhai</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zhaorui Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+J">Jinyang Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Lu%2C+X">Xiaoyi Lu</a>, <a href="/search/cs?searchtype=author&amp;query=Raffenetti%2C+K">Ken Raffenetti</a>, <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+H">Hui Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Zhao%2C+K">Kai Zhao</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+Z">Zizhong Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Cappello%2C+F">Franck Cappello</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+Y">Yanfei Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Thakur%2C+R">Rajeev Thakur</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2304.03890v3-abstract-short" style="display: inline;"> With the ever-increasing computing power of supercomputers and the growing scale of scientific applications, the efficiency of MPI collective communications turns out to be a critical bottleneck in large-scale distributed and parallel processing. The large message size in MPI collectives is particularly concerning because it can significantly degrade the overall parallel performance. To address th&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.03890v3-abstract-full').style.display = 'inline'; document.getElementById('2304.03890v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2304.03890v3-abstract-full" style="display: none;"> With the ever-increasing computing power of supercomputers and the growing scale of scientific applications, the efficiency of MPI collective communications turns out to be a critical bottleneck in large-scale distributed and parallel processing. The large message size in MPI collectives is particularly concerning because it can significantly degrade the overall parallel performance. To address this issue, prior research simply applies the off-the-shelf fix-rate lossy compressors in the MPI collectives, leading to suboptimal performance, limited generalizability, and unbounded errors. In this paper, we propose a novel solution, called C-Coll, which leverages error-bounded lossy compression to significantly reduce the message size, resulting in a substantial reduction in communication cost. The key contributions are three-fold. (1) We develop two general, optimized lossy-compression-based frameworks for both types of MPI collectives (collective data movement as well as collective computation), based on their particular characteristics. Our framework not only reduces communication cost but also preserves data accuracy. (2) We customize SZx, an ultra-fast error-bounded lossy compressor, to meet the specific needs of collective communication. (3) We integrate C-Coll into multiple collectives, such as MPI_Allreduce, MPI_Scatter, and MPI_Bcast, and perform a comprehensive evaluation based on real-world scientific datasets. Experiments show that our solution outperforms the original MPI collectives as well as multiple baselines and related efforts by 1.8-2.7X. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2304.03890v3-abstract-full').style.display = 'none'; document.getElementById('2304.03890v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 7 April, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">13 pages, 18 figures, 6 tables, IPDPS &#39;24</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2303.08803">arXiv:2303.08803</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2303.08803">pdf</a>, <a href="https://arxiv.org/format/2303.08803">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/IPDPSW59300.2023.00018">10.1109/IPDPSW59300.2023.00018 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Cloud Services Enable Efficient AI-Guided Simulation Workflows across Heterogeneous Resources </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ward%2C+L">Logan Ward</a>, <a href="/search/cs?searchtype=author&amp;query=Pauloski%2C+J+G">J. Gregory Pauloski</a>, <a href="/search/cs?searchtype=author&amp;query=Hayot-Sasson%2C+V">Valerie Hayot-Sasson</a>, <a href="/search/cs?searchtype=author&amp;query=Chard%2C+R">Ryan Chard</a>, <a href="/search/cs?searchtype=author&amp;query=Babuji%2C+Y">Yadu Babuji</a>, <a href="/search/cs?searchtype=author&amp;query=Sivaraman%2C+G">Ganesh Sivaraman</a>, <a href="/search/cs?searchtype=author&amp;query=Choudhury%2C+S">Sutanay Choudhury</a>, <a href="/search/cs?searchtype=author&amp;query=Chard%2C+K">Kyle Chard</a>, <a href="/search/cs?searchtype=author&amp;query=Thakur%2C+R">Rajeev Thakur</a>, <a href="/search/cs?searchtype=author&amp;query=Foster%2C+I">Ian Foster</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2303.08803v1-abstract-short" style="display: inline;"> Applications that fuse machine learning and simulation can benefit from the use of multiple computing resources, with, for example, simulation codes running on highly parallel supercomputers and AI training and inference tasks on specialized accelerators. Here, we present our experiences deploying two AI-guided simulation workflows across such heterogeneous systems. A unique aspect of our approach&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.08803v1-abstract-full').style.display = 'inline'; document.getElementById('2303.08803v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2303.08803v1-abstract-full" style="display: none;"> Applications that fuse machine learning and simulation can benefit from the use of multiple computing resources, with, for example, simulation codes running on highly parallel supercomputers and AI training and inference tasks on specialized accelerators. Here, we present our experiences deploying two AI-guided simulation workflows across such heterogeneous systems. A unique aspect of our approach is our use of cloud-hosted management services to manage challenging aspects of cross-resource authentication and authorization, function-as-a-service (FaaS) function invocation, and data transfer. We show that these methods can achieve performance parity with systems that rely on direct connection between resources. We achieve parity by integrating the FaaS system and data transfer capabilities with a system that passes data by reference among managers and workers, and a user-configurable steering algorithm to hide data transfer latencies. We anticipate that this ease of use can enable routine use of heterogeneous resources in computational science. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2303.08803v1-abstract-full').style.display = 'none'; document.getElementById('2303.08803v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 March, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2208.13707">arXiv:2208.13707</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2208.13707">pdf</a>, <a href="https://arxiv.org/format/2208.13707">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Performance">cs.PF</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3555819.3555820">10.1145/3555819.3555820 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> MPIX Stream: An Explicit Solution to Hybrid MPI+X Programming </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Zhou%2C+H">Hui Zhou</a>, <a href="/search/cs?searchtype=author&amp;query=Raffenetti%2C+K">Ken Raffenetti</a>, <a href="/search/cs?searchtype=author&amp;query=Guo%2C+Y">Yanfei Guo</a>, <a href="/search/cs?searchtype=author&amp;query=Thakur%2C+R">Rajeev Thakur</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2208.13707v2-abstract-short" style="display: inline;"> The hybrid MPI+X programming paradigm, where X refers to threads or GPUs, has gained prominence in the high-performance computing arena. This corresponds to a trend of system architectures growing more heterogeneous. The current MPI standard only specifies the compatibility levels between MPI and threading runtimes. No MPI concept or interface exists for applications to pass thread context or GPU&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.13707v2-abstract-full').style.display = 'inline'; document.getElementById('2208.13707v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2208.13707v2-abstract-full" style="display: none;"> The hybrid MPI+X programming paradigm, where X refers to threads or GPUs, has gained prominence in the high-performance computing arena. This corresponds to a trend of system architectures growing more heterogeneous. The current MPI standard only specifies the compatibility levels between MPI and threading runtimes. No MPI concept or interface exists for applications to pass thread context or GPU stream context to MPI implementations explicitly. This lack has made performance optimization complicated in some cases and impossible in other cases. We propose a new concept in MPI, called MPIX stream, to represent the general serial execution context that exists in X runtimes. MPIX streams can be directly mapped to threads or GPU execution streams. Passing thread context into MPI allows implementations to precisely map the execution contexts to network endpoints. Passing GPU execution context into MPI allows implementations to directly operate on GPU streams, lowering the CPU/GPU synchronization cost. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.13707v2-abstract-full').style.display = 'none'; document.getElementById('2208.13707v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, EuroMPI/USA&#39;22</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2111.02325">arXiv:2111.02325</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2111.02325">pdf</a>, <a href="https://arxiv.org/format/2111.02325">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Performance">cs.PF</span> </div> </div> <p class="title is-5 mathjax"> Extending Memory Capacity in Consumer Devices with Emerging Non-Volatile Memory: An Experimental Study </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Oliveira%2C+G+F">Geraldo F. Oliveira</a>, <a href="/search/cs?searchtype=author&amp;query=Ghose%2C+S">Saugata Ghose</a>, <a href="/search/cs?searchtype=author&amp;query=G%C3%B3mez-Luna%2C+J">Juan G贸mez-Luna</a>, <a href="/search/cs?searchtype=author&amp;query=Boroumand%2C+A">Amirali Boroumand</a>, <a href="/search/cs?searchtype=author&amp;query=Savery%2C+A">Alexis Savery</a>, <a href="/search/cs?searchtype=author&amp;query=Rao%2C+S">Sonny Rao</a>, <a href="/search/cs?searchtype=author&amp;query=Qazi%2C+S">Salman Qazi</a>, <a href="/search/cs?searchtype=author&amp;query=Grignou%2C+G">Gwendal Grignou</a>, <a href="/search/cs?searchtype=author&amp;query=Thakur%2C+R">Rahul Thakur</a>, <a href="/search/cs?searchtype=author&amp;query=Shiu%2C+E">Eric Shiu</a>, <a href="/search/cs?searchtype=author&amp;query=Mutlu%2C+O">Onur Mutlu</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2111.02325v3-abstract-short" style="display: inline;"> The number and diversity of consumer devices are growing rapidly, alongside their target applications&#39; memory consumption. Unfortunately, DRAM scalability is becoming a limiting factor to the available memory capacity in consumer devices. As a potential solution, manufacturers have introduced emerging non-volatile memories (NVMs) into the market, which can be used to increase the memory capacity o&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.02325v3-abstract-full').style.display = 'inline'; document.getElementById('2111.02325v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2111.02325v3-abstract-full" style="display: none;"> The number and diversity of consumer devices are growing rapidly, alongside their target applications&#39; memory consumption. Unfortunately, DRAM scalability is becoming a limiting factor to the available memory capacity in consumer devices. As a potential solution, manufacturers have introduced emerging non-volatile memories (NVMs) into the market, which can be used to increase the memory capacity of consumer devices by augmenting or replacing DRAM. Since entirely replacing DRAM with NVM in consumer devices imposes large system integration and design challenges, recent works propose extending the total main memory space available to applications by using NVM as swap space for DRAM. However, no prior work analyzes the implications of enabling a real NVM-based swap space in real consumer devices. In this work, we provide the first analysis of the impact of extending the main memory space of consumer devices using off-the-shelf NVMs. We extensively examine system performance and energy consumption when the NVM device is used as swap space for DRAM main memory to effectively extend the main memory capacity. For our analyses, we equip real web-based Chromebook computers with the Intel Optane SSD, which is a state-of-the-art low-latency NVM-based SSD device. We compare the performance and energy consumption of interactive workloads running on our Chromebook with NVM-based swap space, where the Intel Optane SSD capacity is used as swap space to extend main memory capacity, against two state-of-the-art systems: (i) a baseline system with double the amount of DRAM than the system with the NVM-based swap space; and (ii) a system where the Intel Optane SSD is naively replaced with a state-of-the-art (yet slower) off-the-shelf NAND-flash-based SSD, which we use as a swap space of equivalent size as the NVM-based swap space. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2111.02325v3-abstract-full').style.display = 'none'; document.getElementById('2111.02325v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">This paper has been accepted by IEEE Access</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.02827">arXiv:2110.02827</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2110.02827">pdf</a>, <a href="https://arxiv.org/format/2110.02827">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Materials Science">cond-mat.mtrl-sci</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/MLHPC54614.2021.00007">10.1109/MLHPC54614.2021.00007 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Colmena: Scalable Machine-Learning-Based Steering of Ensemble Simulations for High Performance Computing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ward%2C+L">Logan Ward</a>, <a href="/search/cs?searchtype=author&amp;query=Sivaraman%2C+G">Ganesh Sivaraman</a>, <a href="/search/cs?searchtype=author&amp;query=Pauloski%2C+J+G">J. Gregory Pauloski</a>, <a href="/search/cs?searchtype=author&amp;query=Babuji%2C+Y">Yadu Babuji</a>, <a href="/search/cs?searchtype=author&amp;query=Chard%2C+R">Ryan Chard</a>, <a href="/search/cs?searchtype=author&amp;query=Dandu%2C+N">Naveen Dandu</a>, <a href="/search/cs?searchtype=author&amp;query=Redfern%2C+P+C">Paul C. Redfern</a>, <a href="/search/cs?searchtype=author&amp;query=Assary%2C+R+S">Rajeev S. Assary</a>, <a href="/search/cs?searchtype=author&amp;query=Chard%2C+K">Kyle Chard</a>, <a href="/search/cs?searchtype=author&amp;query=Curtiss%2C+L+A">Larry A. Curtiss</a>, <a href="/search/cs?searchtype=author&amp;query=Thakur%2C+R">Rajeev Thakur</a>, <a href="/search/cs?searchtype=author&amp;query=Foster%2C+I">Ian Foster</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.02827v1-abstract-short" style="display: inline;"> Scientific applications that involve simulation ensembles can be accelerated greatly by using experiment design methods to select the best simulations to perform. Methods that use machine learning (ML) to create proxy models of simulations show particular promise for guiding ensembles but are challenging to deploy because of the need to coordinate dynamic mixes of simulation and learning tasks. We&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.02827v1-abstract-full').style.display = 'inline'; document.getElementById('2110.02827v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.02827v1-abstract-full" style="display: none;"> Scientific applications that involve simulation ensembles can be accelerated greatly by using experiment design methods to select the best simulations to perform. Methods that use machine learning (ML) to create proxy models of simulations show particular promise for guiding ensembles but are challenging to deploy because of the need to coordinate dynamic mixes of simulation and learning tasks. We present Colmena, an open-source Python framework that allows users to steer campaigns by providing just the implementations of individual tasks plus the logic used to choose which tasks to execute when. Colmena handles task dispatch, results collation, ML model invocation, and ML model (re)training, using Parsl to execute tasks on HPC systems. We describe the design of Colmena and illustrate its capabilities by applying it to electrolyte design, where it both scales to 65536 CPUs and accelerates the discovery rate for high-performance molecules by a factor of 100 over unguided searches. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.02827v1-abstract-full').style.display = 'none'; document.getElementById('2110.02827v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">camera-ready version for ML in HPC Environments 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2102.13008">arXiv:2102.13008</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2102.13008">pdf</a>, <a href="https://arxiv.org/format/2102.13008">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> Imitation Learning with Human Eye Gaze via Multi-Objective Prediction </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Thakur%2C+R+K">Ravi Kumar Thakur</a>, <a href="/search/cs?searchtype=author&amp;query=Sunbeam%2C+M+S">MD-Nazmus Samin Sunbeam</a>, <a href="/search/cs?searchtype=author&amp;query=Goecks%2C+V+G">Vinicius G. Goecks</a>, <a href="/search/cs?searchtype=author&amp;query=Novoseller%2C+E">Ellen Novoseller</a>, <a href="/search/cs?searchtype=author&amp;query=Bera%2C+R">Ritwik Bera</a>, <a href="/search/cs?searchtype=author&amp;query=Lawhern%2C+V+J">Vernon J. Lawhern</a>, <a href="/search/cs?searchtype=author&amp;query=Gremillion%2C+G+M">Gregory M. Gremillion</a>, <a href="/search/cs?searchtype=author&amp;query=Valasek%2C+J">John Valasek</a>, <a href="/search/cs?searchtype=author&amp;query=Waytowich%2C+N+R">Nicholas R. Waytowich</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2102.13008v3-abstract-short" style="display: inline;"> Approaches for teaching learning agents via human demonstrations have been widely studied and successfully applied to multiple domains. However, the majority of imitation learning work utilizes only behavioral information from the demonstrator, i.e. which actions were taken, and ignores other useful information. In particular, eye gaze information can give valuable insight towards where the demons&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.13008v3-abstract-full').style.display = 'inline'; document.getElementById('2102.13008v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2102.13008v3-abstract-full" style="display: none;"> Approaches for teaching learning agents via human demonstrations have been widely studied and successfully applied to multiple domains. However, the majority of imitation learning work utilizes only behavioral information from the demonstrator, i.e. which actions were taken, and ignores other useful information. In particular, eye gaze information can give valuable insight towards where the demonstrator is allocating visual attention, and holds the potential to improve agent performance and generalization. In this work, we propose Gaze Regularized Imitation Learning (GRIL), a novel context-aware, imitation learning architecture that learns concurrently from both human demonstrations and eye gaze to solve tasks where visual attention provides important context. We apply GRIL to a visual navigation task, in which an unmanned quadrotor is trained to search for and navigate to a target vehicle in a photorealistic simulated environment. We show that GRIL outperforms several state-of-the-art gaze-based imitation learning algorithms, simultaneously learns to predict human visual attention, and generalizes to scenarios not present in the training data. Supplemental videos and code can be found at https://sites.google.com/view/gaze-regularized-il/. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2102.13008v3-abstract-full').style.display = 'none'; document.getElementById('2102.13008v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 February, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Paper accepted and selected as an oral presentation at Interactive Learning with Implicit Human Feedback Workshop at ICML 2023</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> I.2.6; I.2.9; I.2.10 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2007.03152">arXiv:2007.03152</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2007.03152">pdf</a>, <a href="https://arxiv.org/format/2007.03152">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> The gem5 Simulator: Version 20.0+ </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lowe-Power%2C+J">Jason Lowe-Power</a>, <a href="/search/cs?searchtype=author&amp;query=Ahmad%2C+A+M">Abdul Mutaal Ahmad</a>, <a href="/search/cs?searchtype=author&amp;query=Akram%2C+A">Ayaz Akram</a>, <a href="/search/cs?searchtype=author&amp;query=Alian%2C+M">Mohammad Alian</a>, <a href="/search/cs?searchtype=author&amp;query=Amslinger%2C+R">Rico Amslinger</a>, <a href="/search/cs?searchtype=author&amp;query=Andreozzi%2C+M">Matteo Andreozzi</a>, <a href="/search/cs?searchtype=author&amp;query=Armejach%2C+A">Adri脿 Armejach</a>, <a href="/search/cs?searchtype=author&amp;query=Asmussen%2C+N">Nils Asmussen</a>, <a href="/search/cs?searchtype=author&amp;query=Beckmann%2C+B">Brad Beckmann</a>, <a href="/search/cs?searchtype=author&amp;query=Bharadwaj%2C+S">Srikant Bharadwaj</a>, <a href="/search/cs?searchtype=author&amp;query=Black%2C+G">Gabe Black</a>, <a href="/search/cs?searchtype=author&amp;query=Bloom%2C+G">Gedare Bloom</a>, <a href="/search/cs?searchtype=author&amp;query=Bruce%2C+B+R">Bobby R. Bruce</a>, <a href="/search/cs?searchtype=author&amp;query=Carvalho%2C+D+R">Daniel Rodrigues Carvalho</a>, <a href="/search/cs?searchtype=author&amp;query=Castrillon%2C+J">Jeronimo Castrillon</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+L">Lizhong Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Derumigny%2C+N">Nicolas Derumigny</a>, <a href="/search/cs?searchtype=author&amp;query=Diestelhorst%2C+S">Stephan Diestelhorst</a>, <a href="/search/cs?searchtype=author&amp;query=Elsasser%2C+W">Wendy Elsasser</a>, <a href="/search/cs?searchtype=author&amp;query=Escuin%2C+C">Carlos Escuin</a>, <a href="/search/cs?searchtype=author&amp;query=Fariborz%2C+M">Marjan Fariborz</a>, <a href="/search/cs?searchtype=author&amp;query=Farmahini-Farahani%2C+A">Amin Farmahini-Farahani</a>, <a href="/search/cs?searchtype=author&amp;query=Fotouhi%2C+P">Pouya Fotouhi</a>, <a href="/search/cs?searchtype=author&amp;query=Gambord%2C+R">Ryan Gambord</a>, <a href="/search/cs?searchtype=author&amp;query=Gandhi%2C+J">Jayneel Gandhi</a> , et al. (53 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2007.03152v2-abstract-short" style="display: inline;"> The open-source and community-supported gem5 simulator is one of the most popular tools for computer architecture research. This simulation infrastructure allows researchers to model modern computer hardware at the cycle level, and it has enough fidelity to boot unmodified Linux-based operating systems and run full applications for multiple architectures including x86, Arm, and RISC-V. The gem5 si&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.03152v2-abstract-full').style.display = 'inline'; document.getElementById('2007.03152v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2007.03152v2-abstract-full" style="display: none;"> The open-source and community-supported gem5 simulator is one of the most popular tools for computer architecture research. This simulation infrastructure allows researchers to model modern computer hardware at the cycle level, and it has enough fidelity to boot unmodified Linux-based operating systems and run full applications for multiple architectures including x86, Arm, and RISC-V. The gem5 simulator has been under active development over the last nine years since the original gem5 release. In this time, there have been over 7500 commits to the codebase from over 250 unique contributors which have improved the simulator by adding new features, fixing bugs, and increasing the code quality. In this paper, we give and overview of gem5&#39;s usage and features, describe the current state of the gem5 simulator, and enumerate the major changes since the initial release of gem5. We also discuss how the gem5 simulator has transitioned to a formal governance model to enable continued improvement and community support for the next 20 years of computer architecture research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2007.03152v2-abstract-full').style.display = 'none'; document.getElementById('2007.03152v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 September, 2020; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 July, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Source, comments, and feedback: https://github.com/darchr/gem5-20-paper</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1904.11163">arXiv:1904.11163</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1904.11163">pdf</a>, <a href="https://arxiv.org/format/1904.11163">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> </div> </div> <p class="title is-5 mathjax"> A Conditional Adversarial Network for Scene Flow Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Thakur%2C+R+K">Ravi Kumar Thakur</a>, <a href="/search/cs?searchtype=author&amp;query=Mukherjee%2C+S">Snehasis Mukherjee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1904.11163v1-abstract-short" style="display: inline;"> The problem of Scene flow estimation in depth videos has been attracting attention of researchers of robot vision, due to its potential application in various areas of robotics. The conventional scene flow methods are difficult to use in reallife applications due to their long computational overhead. We propose a conditional adversarial network SceneFlowGAN for scene flow estimation. The proposed&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1904.11163v1-abstract-full').style.display = 'inline'; document.getElementById('1904.11163v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1904.11163v1-abstract-full" style="display: none;"> The problem of Scene flow estimation in depth videos has been attracting attention of researchers of robot vision, due to its potential application in various areas of robotics. The conventional scene flow methods are difficult to use in reallife applications due to their long computational overhead. We propose a conditional adversarial network SceneFlowGAN for scene flow estimation. The proposed SceneFlowGAN uses loss function at two ends: both generator and descriptor ends. The proposed network is the first attempt to estimate scene flow using generative adversarial networks, and is able to estimate both the optical flow and disparity from the input stereo images simultaneously. The proposed method is experimented on a large RGB-D benchmark sceneflow dataset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1904.11163v1-abstract-full').style.display = 'none'; document.getElementById('1904.11163v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 April, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1807.03464">arXiv:1807.03464</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1807.03464">pdf</a>, <a href="https://arxiv.org/format/1807.03464">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Geometry">cs.CG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/ICARCV.2018.8581172">10.1109/ICARCV.2018.8581172 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> SceneEDNet: A Deep Learning Approach for Scene Flow Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Thakur%2C+R+K">Ravi Kumar Thakur</a>, <a href="/search/cs?searchtype=author&amp;query=Mukherjee%2C+S">Snehasis Mukherjee</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1807.03464v1-abstract-short" style="display: inline;"> Estimating scene flow in RGB-D videos is attracting much interest of the computer vision researchers, due to its potential applications in robotics. The state-of-the-art techniques for scene flow estimation, typically rely on the knowledge of scene structure of the frame and the correspondence between frames. However, with the increasing amount of RGB-D data captured from sophisticated sensors lik&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1807.03464v1-abstract-full').style.display = 'inline'; document.getElementById('1807.03464v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1807.03464v1-abstract-full" style="display: none;"> Estimating scene flow in RGB-D videos is attracting much interest of the computer vision researchers, due to its potential applications in robotics. The state-of-the-art techniques for scene flow estimation, typically rely on the knowledge of scene structure of the frame and the correspondence between frames. However, with the increasing amount of RGB-D data captured from sophisticated sensors like Microsoft Kinect, and the recent advances in the area of sophisticated deep learning techniques, introduction of an efficient deep learning technique for scene flow estimation, is becoming important. This paper introduces a first effort to apply a deep learning method for direct estimation of scene flow by presenting a fully convolutional neural network with an encoder-decoder (ED) architecture. The proposed network SceneEDNet involves estimation of three dimensional motion vectors of all the scene points from sequence of stereo images. The training for direct estimation of scene flow is done using consecutive pairs of stereo images and corresponding scene flow ground truth. The proposed architecture is applied on a huge dataset and provides meaningful results. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1807.03464v1-abstract-full').style.display = 'none'; document.getElementById('1807.03464v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 July, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> ICARCV (2018) 394-399 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1312.1629">arXiv:1312.1629</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1312.1629">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> Detection and prevention of botnets and malware in an enterprise network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Thakur%2C+M+R">Manoj Rameshchandra Thakur</a>, <a href="/search/cs?searchtype=author&amp;query=Khilnani%2C+D+R">Divye Raj Khilnani</a>, <a href="/search/cs?searchtype=author&amp;query=Gupta%2C+K">Kushagra Gupta</a>, <a href="/search/cs?searchtype=author&amp;query=Jain%2C+S">Sandeep Jain</a>, <a href="/search/cs?searchtype=author&amp;query=Agarwal%2C+V">Vineet Agarwal</a>, <a href="/search/cs?searchtype=author&amp;query=Sane%2C+S">Suneeta Sane</a>, <a href="/search/cs?searchtype=author&amp;query=Sanyal%2C+S">Sugata Sanyal</a>, <a href="/search/cs?searchtype=author&amp;query=Dhekne%2C+P+S">Prabhakar S Dhekne</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1312.1629v1-abstract-short" style="display: inline;"> One of the most significant threats faced by enterprise networks today is from Bots. A Bot is a program that operates as an agent for a user and runs automated tasks over the internet, at a much higher rate than would be possible for a human alone. A collection of Bots in a network, used for malicious purposes is referred to as a Botnet. Bot attacks can range from localized attacks like key-loggin&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1312.1629v1-abstract-full').style.display = 'inline'; document.getElementById('1312.1629v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1312.1629v1-abstract-full" style="display: none;"> One of the most significant threats faced by enterprise networks today is from Bots. A Bot is a program that operates as an agent for a user and runs automated tasks over the internet, at a much higher rate than would be possible for a human alone. A collection of Bots in a network, used for malicious purposes is referred to as a Botnet. Bot attacks can range from localized attacks like key-logging to network intensive attacks like Distributed Denial of Service (DDoS). In this paper, we suggest a novel approach that can detect and combat Bots. The proposed solution adopts a two pronged strategy which we have classified into the standalone algorithm and the network algorithm. The standalone algorithm runs independently on each node of the network. It monitors the active processes on the node and tries to identify Bot processes using parameters such as response time and output to input traffic ratio. If a suspicious process has been identified the network algorithm is triggered. The network algorithm will then analyze conversations to and from the hosts of the network using the transport layer flow records. It then tries to deduce the Bot pattern as well as Bot signatures which can subsequently be used by the standalone algorithm to thwart Bot processes at their very onset. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1312.1629v1-abstract-full').style.display = 'none'; document.getElementById('1312.1629v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 December, 2013; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2013. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 7 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1309.1780">arXiv:1309.1780</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1309.1780">pdf</a>, <a href="https://arxiv.org/ps/1309.1780">ps</a>, <a href="https://arxiv.org/format/1309.1780">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Mathematical Software">cs.MS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.5334/jors.aw">10.5334/jors.aw <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Software Abstractions and Methodologies for HPC Simulation Codes on Future Architectures </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Dubey%2C+A">A. Dubey</a>, <a href="/search/cs?searchtype=author&amp;query=Brandt%2C+S">S. Brandt</a>, <a href="/search/cs?searchtype=author&amp;query=Brower%2C+R">R. Brower</a>, <a href="/search/cs?searchtype=author&amp;query=Giles%2C+M">M. Giles</a>, <a href="/search/cs?searchtype=author&amp;query=Hovland%2C+P">P. Hovland</a>, <a href="/search/cs?searchtype=author&amp;query=Lamb%2C+D+Q">D. Q. Lamb</a>, <a href="/search/cs?searchtype=author&amp;query=Loffler%2C+F">F. Loffler</a>, <a href="/search/cs?searchtype=author&amp;query=Norris%2C+B">B. Norris</a>, <a href="/search/cs?searchtype=author&amp;query=OShea%2C+B">B. OShea</a>, <a href="/search/cs?searchtype=author&amp;query=Rebbi%2C+C">C. Rebbi</a>, <a href="/search/cs?searchtype=author&amp;query=Snir%2C+M">M. Snir</a>, <a href="/search/cs?searchtype=author&amp;query=Thakur%2C+R">R. Thakur</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1309.1780v1-abstract-short" style="display: inline;"> Large, complex, multi-scale, multi-physics simulation codes, running on high performance com-puting (HPC) platforms, have become essential to advancing science and engineering. These codes simulate multi-scale, multi-physics phenomena with unprecedented fidelity on petascale platforms, and are used by large communities. Continued ability of these codes to run on future platforms is as crucial to t&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1309.1780v1-abstract-full').style.display = 'inline'; document.getElementById('1309.1780v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1309.1780v1-abstract-full" style="display: none;"> Large, complex, multi-scale, multi-physics simulation codes, running on high performance com-puting (HPC) platforms, have become essential to advancing science and engineering. These codes simulate multi-scale, multi-physics phenomena with unprecedented fidelity on petascale platforms, and are used by large communities. Continued ability of these codes to run on future platforms is as crucial to their communities as continued improvements in instruments and facilities are to experimental scientists. However, the ability of code developers to do these things faces a serious challenge with the paradigm shift underway in platform architecture. The complexity and uncertainty of the future platforms makes it essential to approach this challenge cooperatively as a community. We need to develop common abstractions, frameworks, programming models and software development methodologies that can be applied across a broad range of complex simulation codes, and common software infrastructure to support them. In this position paper we express and discuss our belief that such an infrastructure is critical to the deployment of existing and new large, multi-scale, multi-physics codes on future HPC platforms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1309.1780v1-abstract-full').style.display = 'none'; document.getElementById('1309.1780v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 September, 2013; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2013. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Position Paper</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1212.1914">arXiv:1212.1914</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1212.1914">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> A Heuristic Reputation Based System to Detect Spam activities in a Social Networking Platform, HRSSSNP </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Thakur%2C+M+R">Manoj Rameshchandra Thakur</a>, <a href="/search/cs?searchtype=author&amp;query=Sanyal%2C+S">Sugata Sanyal</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1212.1914v1-abstract-short" style="display: inline;"> The introduction of the social networking platform has drastically affected the way individuals interact. Even though most of the effects have been positive, there exist some serious threats associated with the interactions on a social networking website. A considerable proportion of the crimes that occur are initiated through a social networking platform [5]. Almost 33% of the crimes on the inter&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1212.1914v1-abstract-full').style.display = 'inline'; document.getElementById('1212.1914v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1212.1914v1-abstract-full" style="display: none;"> The introduction of the social networking platform has drastically affected the way individuals interact. Even though most of the effects have been positive, there exist some serious threats associated with the interactions on a social networking website. A considerable proportion of the crimes that occur are initiated through a social networking platform [5]. Almost 33% of the crimes on the internet are initiated through a social networking website [5]. Moreover activities like spam messages create unnecessary traffic and might affect the user base of a social networking platform. As a result preventing interactions with malicious intent and spam activities becomes crucial. This work attempts to detect the same in a social networking platform by considering a social network as a weighted graph wherein each node, which represents an individual in the social network, stores activities of other nodes with respect to itself in an optimized format which is referred to as localized data-set. The weights associated with the edges in the graph represent the trust relationship between profiles. The weights of the edges along with the localized data-set is used to infer whether nodes in the social network are compromised and are performing spam or malicious activities. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1212.1914v1-abstract-full').style.display = 'none'; document.getElementById('1212.1914v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 December, 2012; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2012. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">5 Pages, 1 Figure</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1207.0122">arXiv:1207.0122</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1207.0122">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> A Distributed and Cooperative Approach to Botnet Detection Using Gossip Protocol </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Thakur%2C+M+R">Manoj Rameshchandra Thakur</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1207.0122v1-abstract-short" style="display: inline;"> Bots, in recent times, have posed a major threat to enterprise networks. With the distributed nature of the way in which botnets operate, the problems faced by enterprises have become acute. A bot is a program that operates as an agent for a user and runs automated tasks over the internet, at a much higher rate than would be possible for a human alone. A collection of bots in a network, used for m&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1207.0122v1-abstract-full').style.display = 'inline'; document.getElementById('1207.0122v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1207.0122v1-abstract-full" style="display: none;"> Bots, in recent times, have posed a major threat to enterprise networks. With the distributed nature of the way in which botnets operate, the problems faced by enterprises have become acute. A bot is a program that operates as an agent for a user and runs automated tasks over the internet, at a much higher rate than would be possible for a human alone. A collection of bots in a network, used for malicious purposes, is referred to as a botnet. In this paper we suggested a distributed, co-operative approach towards detecting botnets is a given network which is inspired by the gossip protocol. Each node in a given network runs a standalone agent that computes a suspicion value for that node after regular intervals. Each node in the network exchanges its suspicion values with every other node in the network at regular intervals. The use of gossip protocol ensures that if a node in the network is compromised, all other nodes in the network are informed about it as soon as possible. Each node also ensures that at any instance, by means of the gossip protocol, it maintains the latest suspicion values of all the other nodes in the network. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1207.0122v1-abstract-full').style.display = 'none'; document.getElementById('1207.0122v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 June, 2012; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2012. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 1 figure</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1206.2307">arXiv:1206.2307</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1206.2307">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> A PAXOS based State Machine Replication System for Anomaly Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Thakur%2C+M+R">Manoj Rameshchandra Thakur</a>, <a href="/search/cs?searchtype=author&amp;query=Sanyal%2C+S">Sugata Sanyal</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1206.2307v1-abstract-short" style="display: inline;"> A number of systems in recent times suffer from attacks like DDoS and Ping of Death. Such attacks result in loss of critical system resources and CPU cycles, as these compromised systems behave in an abnormal manner. The effect of such abnormalities is worse in case of compromised systems handling financial transaction, since it leads to severe monetary losses. In this paper we propose a system th&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1206.2307v1-abstract-full').style.display = 'inline'; document.getElementById('1206.2307v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1206.2307v1-abstract-full" style="display: none;"> A number of systems in recent times suffer from attacks like DDoS and Ping of Death. Such attacks result in loss of critical system resources and CPU cycles, as these compromised systems behave in an abnormal manner. The effect of such abnormalities is worse in case of compromised systems handling financial transaction, since it leads to severe monetary losses. In this paper we propose a system that uses the Replicated State Machine approach to detect abnormality in system usage. The suggested system is based on PAXOS algorithm, an algorithm for solving the consensus problem in a network of unreliable processors. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1206.2307v1-abstract-full').style.display = 'none'; document.getElementById('1206.2307v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 June, 2012; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2012. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">9 pages, 4 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1205.4457">arXiv:1205.4457</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1205.4457">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> A Hybrid Approach Towards Intrusion Detection Based on Artificial Immune System and Soft Computing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Sanyal%2C+S">Sugata Sanyal</a>, <a href="/search/cs?searchtype=author&amp;query=Thakur%2C+M+R">Manoj Rameshchandra Thakur</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1205.4457v1-abstract-short" style="display: inline;"> A number of works in the field of intrusion detection have been based on Artificial Immune System and Soft Computing. Artificial Immune System based approaches attempt to leverage the adaptability, error tolerance, self- monitoring and distributed nature of Human Immune Systems. Whereas Soft Computing based approaches are instrumental in developing fuzzy rule based systems for detecting intrusions&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1205.4457v1-abstract-full').style.display = 'inline'; document.getElementById('1205.4457v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1205.4457v1-abstract-full" style="display: none;"> A number of works in the field of intrusion detection have been based on Artificial Immune System and Soft Computing. Artificial Immune System based approaches attempt to leverage the adaptability, error tolerance, self- monitoring and distributed nature of Human Immune Systems. Whereas Soft Computing based approaches are instrumental in developing fuzzy rule based systems for detecting intrusions. They are computationally intensive and apply machine learning (both supervised and unsupervised) techniques to detect intrusions in a given system. A combination of these two approaches could provide significant advantages for intrusion detection. In this paper we attempt to leverage the adaptability of Artificial Immune System and the computation intensive nature of Soft Computing to develop a system that can effectively detect intrusions in a given network. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1205.4457v1-abstract-full').style.display = 'none'; document.getElementById('1205.4457v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 May, 2012; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2012. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">11 Pages, 2 Figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1205.2340">arXiv:1205.2340</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1205.2340">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Cryptography and Security">cs.CR</span> </div> </div> <p class="title is-5 mathjax"> A Multi-Dimensional approach towards Intrusion Detection System </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Thakur%2C+M+R">Manoj Rameshchandra Thakur</a>, <a href="/search/cs?searchtype=author&amp;query=Sanyal%2C+S">Sugata Sanyal</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1205.2340v1-abstract-short" style="display: inline;"> In this paper, we suggest a multi-dimensional approach towards intrusion detection. Network and system usage parameters like source and destination IP addresses; source and destination ports; incoming and outgoing network traffic data rate and number of CPU cycles per request are divided into multiple dimensions. Rather than analyzing raw bytes of data corresponding to the values of the network pa&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1205.2340v1-abstract-full').style.display = 'inline'; document.getElementById('1205.2340v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1205.2340v1-abstract-full" style="display: none;"> In this paper, we suggest a multi-dimensional approach towards intrusion detection. Network and system usage parameters like source and destination IP addresses; source and destination ports; incoming and outgoing network traffic data rate and number of CPU cycles per request are divided into multiple dimensions. Rather than analyzing raw bytes of data corresponding to the values of the network parameters, a mature function is inferred during the training phase for each dimension. This mature function takes a dimension value as an input and returns a value that represents the level of abnormality in the system usage with respect to that dimension. This mature function is referred to as Individual Anomaly Indicator. Individual Anomaly Indicators recorded for each of the dimensions are then used to generate a Global Anomaly Indicator, a function with n variables (n is the number of dimensions) that provides the Global Anomaly Factor, an indicator of anomaly in the system usage based on all the dimensions considered together. The Global Anomaly Indicator inferred during the training phase is then used to detect anomaly in the network traffic during the detection phase. Network traffic data encountered during the detection phase is fed back to the system to improve the maturity of the Individual Anomaly Indicators and hence the Global Anomaly Indicator. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1205.2340v1-abstract-full').style.display = 'none'; document.getElementById('1205.2340v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 May, 2012; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2012. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">8 pages, 3 Figures, 4 Tables</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/cs/0310029">arXiv:cs/0310029</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/cs/0310029">pdf</a>, <a href="https://arxiv.org/ps/cs/0310029">ps</a>, <a href="https://arxiv.org/format/cs/0310029">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Optimizing Noncontiguous Accesses in MPI-IO </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Thakur%2C+R">Rajeev Thakur</a>, <a href="/search/cs?searchtype=author&amp;query=Gropp%2C+W">William Gropp</a>, <a href="/search/cs?searchtype=author&amp;query=Lusk%2C+E">Ewing Lusk</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="cs/0310029v1-abstract-short" style="display: inline;"> The I/O access patterns of many parallel applications consist of accesses to a large number of small, noncontiguous pieces of data. If an application&#39;s I/O needs are met by making many small, distinct I/O requests, however, the I/O performance degrades drastically. To avoid this problem, MPI-IO allows users to access noncontiguous data with a single I/O function call, unlike in Unix I/O. In this&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('cs/0310029v1-abstract-full').style.display = 'inline'; document.getElementById('cs/0310029v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="cs/0310029v1-abstract-full" style="display: none;"> The I/O access patterns of many parallel applications consist of accesses to a large number of small, noncontiguous pieces of data. If an application&#39;s I/O needs are met by making many small, distinct I/O requests, however, the I/O performance degrades drastically. To avoid this problem, MPI-IO allows users to access noncontiguous data with a single I/O function call, unlike in Unix I/O. In this paper, we explain how critical this feature of MPI-IO is for high performance and how it enables implementations to perform optimizations. We first provide a classification of the different ways of expressing an application&#39;s I/O needs in MPI-IO--we classify them into four levels, called level~0 through level~3. We demonstrate that, for applications with noncontiguous access patterns, the I/O performance improves dramatically if users write their applications to make level-3 requests (noncontiguous, collective) rather than level-0 requests (Unix style). We then describe how our MPI-IO implementation, ROMIO, delivers high performance for noncontiguous requests. We explain in detail the two key optimizations ROMIO performs: data sieving for noncontiguous requests from one process and collective I/O for noncontiguous requests from multiple processes. We describe how we have implemented these optimizations portably on multiple machines and file systems, controlled their memory requirements, and also achieved high performance. We demonstrate the performance and portability with performance results for three applications--an astrophysics-application template (DIST3D), the NAS BTIO benchmark, and an unstructured code (UNSTRUC)--on five different parallel machines: HP Exemplar, IBM SP, Intel Paragon, NEC SX-4, and SGI Origin2000. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('cs/0310029v1-abstract-full').style.display = 'none'; document.getElementById('cs/0310029v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 October, 2003; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2003. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">18 pages, 12 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Report number:</span> ANL/MCS-P913-1001 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> B.4.3; D.1.3 </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Parallel Computing 28(1) (January 2002), pp. 83-105 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/cs/0306048">arXiv:cs/0306048</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/cs/0306048">pdf</a>, <a href="https://arxiv.org/ps/cs/0306048">ps</a>, <a href="https://arxiv.org/format/cs/0306048">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Parallel netCDF: A Scientific High-Performance I/O Interface </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Li%2C+J">Jianwei Li</a>, <a href="/search/cs?searchtype=author&amp;query=Liao%2C+W">Wei-keng Liao</a>, <a href="/search/cs?searchtype=author&amp;query=Choudhary%2C+A">Alok Choudhary</a>, <a href="/search/cs?searchtype=author&amp;query=Ross%2C+R">Robert Ross</a>, <a href="/search/cs?searchtype=author&amp;query=Thakur%2C+R">Rajeev Thakur</a>, <a href="/search/cs?searchtype=author&amp;query=Gropp%2C+W">William Gropp</a>, <a href="/search/cs?searchtype=author&amp;query=Latham%2C+R">Rob Latham</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="cs/0306048v1-abstract-short" style="display: inline;"> Dataset storage, exchange, and access play a critical role in scientific applications. For such purposes netCDF serves as a portable and efficient file format and programming interface, which is popular in numerous scientific application domains. However, the original interface does not provide an efficient mechanism for parallel data storage and access. In this work, we present a new parallel i&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('cs/0306048v1-abstract-full').style.display = 'inline'; document.getElementById('cs/0306048v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="cs/0306048v1-abstract-full" style="display: none;"> Dataset storage, exchange, and access play a critical role in scientific applications. For such purposes netCDF serves as a portable and efficient file format and programming interface, which is popular in numerous scientific application domains. However, the original interface does not provide an efficient mechanism for parallel data storage and access. In this work, we present a new parallel interface for writing and reading netCDF datasets. This interface is derived with minimum changes from the serial netCDF interface but defines semantics for parallel access and is tailored for high performance. The underlying parallel I/O is achieved through MPI-IO, allowing for dramatic performance gains through the use of collective I/O optimizations. We compare the implementation strategies with HDF5 and analyze both. Our tests indicate programming convenience and significant I/O performance improvement with this parallel netCDF interface. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('cs/0306048v1-abstract-full').style.display = 'none'; document.getElementById('cs/0306048v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 June, 2003; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2003. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages,7 figures</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Report number:</span> Preprint ANL/MCS-P1048-0503 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> D.1.3 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/cs/0102016">arXiv:cs/0102016</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/cs/0102016">pdf</a>, <a href="https://arxiv.org/ps/cs/0102016">ps</a>, <a href="https://arxiv.org/format/cs/0102016">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> A Scientific Data Management System for Irregular Applications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=No%2C+J">Jaechun No</a>, <a href="/search/cs?searchtype=author&amp;query=Thakur%2C+R">Rajeev Thakur</a>, <a href="/search/cs?searchtype=author&amp;query=Kaushik%2C+D">Dinesh Kaushik</a>, <a href="/search/cs?searchtype=author&amp;query=Freitag%2C+L">Lori Freitag</a>, <a href="/search/cs?searchtype=author&amp;query=Choudhary%2C+A">Alok Choudhary</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="cs/0102016v1-abstract-short" style="display: inline;"> Many scientific applications are I/O intensive and generate or access large data sets, spanning hundreds or thousands of &#34;files.&#34; Management, storage, efficient access, and analysis of this data present an extremely challenging task. We have developed a software system, called Scientific Data Manager (SDM), that uses a combination of parallel file I/O and database support for high-performance sc&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('cs/0102016v1-abstract-full').style.display = 'inline'; document.getElementById('cs/0102016v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="cs/0102016v1-abstract-full" style="display: none;"> Many scientific applications are I/O intensive and generate or access large data sets, spanning hundreds or thousands of &#34;files.&#34; Management, storage, efficient access, and analysis of this data present an extremely challenging task. We have developed a software system, called Scientific Data Manager (SDM), that uses a combination of parallel file I/O and database support for high-performance scientific data management. SDM provides a high-level API to the user and internally, uses a parallel file system to store real data and a database to store application-related metadata. In this paper, we describe how we designed and implemented SDM to support irregular applications. SDM can efficiently handle the reading and writing of data in an irregular mesh as well as the distribution of index values. We describe the SDM user interface and how we implemented it to achieve high performance. SDM makes extensive use of MPI-IO&#39;s noncontiguous collective I/O functions. SDM also uses the concept of a history file to optimize the cost of the index distribution using the metadata stored in the database. We present performance results with two irregular applications, a CFD code called FUN3D and a Rayleigh-Taylor instability code, on the SGI Origin2000 at Argonne National Laboratory. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('cs/0102016v1-abstract-full').style.display = 'none'; document.getElementById('cs/0102016v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 February, 2001; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2001. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">7 pages + title page</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Report number:</span> ANL/MCS-P866-1000 <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> B.4; B.4.3 </p> </li> </ol> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10