CINXE.COM

Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1&ndash;30 of 30 results for author: <span class="mathjax">Merzky, A</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&amp;query=Merzky%2C+A">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Merzky, A"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Merzky%2C+A&amp;terms-0-field=author&amp;size=50&amp;order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Merzky, A"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.13343">arXiv:2503.13343</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2503.13343">pdf</a>, <a href="https://arxiv.org/format/2503.13343">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> Scalable Runtime Architecture for Data-driven, Hybrid HPC and ML Workflow Applications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Merzky%2C+A">Andre Merzky</a>, <a href="/search/cs?searchtype=author&amp;query=Titov%2C+M">Mikhail Titov</a>, <a href="/search/cs?searchtype=author&amp;query=Turilli%2C+M">Matteo Turilli</a>, <a href="/search/cs?searchtype=author&amp;query=Kilic%2C+O">Ozgur Kilic</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+T">Tianle Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Jha%2C+S">Shantenu Jha</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.13343v1-abstract-short" style="display: inline;"> Hybrid workflows combining traditional HPC and novel ML methodologies are transforming scientific computing. This paper presents the architecture and implementation of a scalable runtime system that extends RADICAL-Pilot with service-based execution to support AI-out-HPC workflows. Our runtime system enables distributed ML capabilities, efficient resource management, and seamless HPC/ML coupling a&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.13343v1-abstract-full').style.display = 'inline'; document.getElementById('2503.13343v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.13343v1-abstract-full" style="display: none;"> Hybrid workflows combining traditional HPC and novel ML methodologies are transforming scientific computing. This paper presents the architecture and implementation of a scalable runtime system that extends RADICAL-Pilot with service-based execution to support AI-out-HPC workflows. Our runtime system enables distributed ML capabilities, efficient resource management, and seamless HPC/ML coupling across local and remote platforms. Preliminary experimental results show that our approach manages concurrent execution of ML models across local and remote HPC/cloud resources with minimal architectural overheads. This lays the foundation for prototyping three representative data-driven workflow applications and executing them at scale on leadership-class HPC platforms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.13343v1-abstract-full').style.display = 'none'; document.getElementById('2503.13343v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 17 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2411.10637">arXiv:2411.10637</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2411.10637">pdf</a>, <a href="https://arxiv.org/format/2411.10637">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Exascale Workflow Applications and Middleware: An ExaWorks Retrospective </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Alsaadi%2C+A">Aymen Alsaadi</a>, <a href="/search/cs?searchtype=author&amp;query=Hategan-Marandiuc%2C+M">Mihael Hategan-Marandiuc</a>, <a href="/search/cs?searchtype=author&amp;query=Maheshwari%2C+K">Ketan Maheshwari</a>, <a href="/search/cs?searchtype=author&amp;query=Merzky%2C+A">Andre Merzky</a>, <a href="/search/cs?searchtype=author&amp;query=Titov%2C+M">Mikhail Titov</a>, <a href="/search/cs?searchtype=author&amp;query=Turilli%2C+M">Matteo Turilli</a>, <a href="/search/cs?searchtype=author&amp;query=Wilke%2C+A">Andreas Wilke</a>, <a href="/search/cs?searchtype=author&amp;query=Wozniak%2C+J+M">Justin M. Wozniak</a>, <a href="/search/cs?searchtype=author&amp;query=Chard%2C+K">Kyle Chard</a>, <a href="/search/cs?searchtype=author&amp;query=da+Silva%2C+R+F">Rafael Ferreira da Silva</a>, <a href="/search/cs?searchtype=author&amp;query=Jha%2C+S">Shantenu Jha</a>, <a href="/search/cs?searchtype=author&amp;query=Laney%2C+D">Daniel Laney</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2411.10637v1-abstract-short" style="display: inline;"> Exascale computers offer transformative capabilities to combine data-driven and learning-based approaches with traditional simulation applications to accelerate scientific discovery and insight. However, these software combinations and integrations are difficult to achieve due to the challenges of coordinating and deploying heterogeneous software components on diverse and massive platforms. We pre&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10637v1-abstract-full').style.display = 'inline'; document.getElementById('2411.10637v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2411.10637v1-abstract-full" style="display: none;"> Exascale computers offer transformative capabilities to combine data-driven and learning-based approaches with traditional simulation applications to accelerate scientific discovery and insight. However, these software combinations and integrations are difficult to achieve due to the challenges of coordinating and deploying heterogeneous software components on diverse and massive platforms. We present the ExaWorks project, which addresses many of these challenges. We developed a workflow Software Development Toolkit (SDK), a curated collection of workflow technologies that can be composed and interoperated through a common interface, engineered following current best practices, and specifically designed to work on HPC platforms. ExaWorks also developed PSI/J, a job management abstraction API, to simplify the construction of portable software components and applications that can be used over various HPC schedulers. The PSI/J API is a minimal interface for submitting and monitoring jobs and their execution state across multiple and commonly used HPC schedulers. We also describe several leading and innovative workflow examples of ExaWorks tools used on DOE leadership platforms. Furthermore, we discuss how our project is working with the workflow community, large computing facilities, and HPC platform vendors to address the requirements of workflows sustainably at the exascale. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2411.10637v1-abstract-full').style.display = 'none'; document.getElementById('2411.10637v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 November, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.16646">arXiv:2407.16646</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2407.16646">pdf</a>, <a href="https://arxiv.org/format/2407.16646">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> ExaWorks Software Development Kit: A Robust and Scalable Collection of Interoperable Workflow Technologies </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Turilli%2C+M">Matteo Turilli</a>, <a href="/search/cs?searchtype=author&amp;query=Hategan-Marandiuc%2C+M">Mihael Hategan-Marandiuc</a>, <a href="/search/cs?searchtype=author&amp;query=Titov%2C+M">Mikhail Titov</a>, <a href="/search/cs?searchtype=author&amp;query=Maheshwari%2C+K">Ketan Maheshwari</a>, <a href="/search/cs?searchtype=author&amp;query=Alsaadi%2C+A">Aymen Alsaadi</a>, <a href="/search/cs?searchtype=author&amp;query=Merzky%2C+A">Andre Merzky</a>, <a href="/search/cs?searchtype=author&amp;query=Arambula%2C+R">Ramon Arambula</a>, <a href="/search/cs?searchtype=author&amp;query=Zakharchanka%2C+M">Mikhail Zakharchanka</a>, <a href="/search/cs?searchtype=author&amp;query=Cowan%2C+M">Matt Cowan</a>, <a href="/search/cs?searchtype=author&amp;query=Wozniak%2C+J+M">Justin M. Wozniak</a>, <a href="/search/cs?searchtype=author&amp;query=Wilke%2C+A">Andreas Wilke</a>, <a href="/search/cs?searchtype=author&amp;query=Kilic%2C+O+O">Ozgur Ozan Kilic</a>, <a href="/search/cs?searchtype=author&amp;query=Chard%2C+K">Kyle Chard</a>, <a href="/search/cs?searchtype=author&amp;query=da+Silva%2C+R+F">Rafael Ferreira da Silva</a>, <a href="/search/cs?searchtype=author&amp;query=Jha%2C+S">Shantenu Jha</a>, <a href="/search/cs?searchtype=author&amp;query=Laney%2C+D">Daniel Laney</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.16646v1-abstract-short" style="display: inline;"> Scientific discovery increasingly requires executing heterogeneous scientific workflows on high-performance computing (HPC) platforms. Heterogeneous workflows contain different types of tasks (e.g., simulation, analysis, and learning) that need to be mapped, scheduled, and launched on different computing. That requires a software stack that enables users to code their workflows and automate resour&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.16646v1-abstract-full').style.display = 'inline'; document.getElementById('2407.16646v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.16646v1-abstract-full" style="display: none;"> Scientific discovery increasingly requires executing heterogeneous scientific workflows on high-performance computing (HPC) platforms. Heterogeneous workflows contain different types of tasks (e.g., simulation, analysis, and learning) that need to be mapped, scheduled, and launched on different computing. That requires a software stack that enables users to code their workflows and automate resource management and workflow execution. Currently, there are many workflow technologies with diverse levels of robustness and capabilities, and users face difficult choices of software that can effectively and efficiently support their use cases on HPC machines, especially when considering the latest exascale platforms. We contributed to addressing this issue by developing the ExaWorks Software Development Kit (SDK). The SDK is a curated collection of workflow technologies engineered following current best practices and specifically designed to work on HPC platforms. We present our experience with (1) curating those technologies, (2) integrating them to provide users with new capabilities, (3) developing a continuous integration platform to test the SDK on DOE HPC platforms, (4) designing a dashboard to publish the results of those tests, and (5) devising an innovative documentation platform to help users to use those technologies. Our experience details the requirements and the best practices needed to curate workflow technologies, and it also serves as a blueprint for the capabilities and services that DOE will have to offer to support a variety of scientific heterogeneous workflows on the newly available exascale HPC platforms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.16646v1-abstract-full').style.display = 'none'; document.getElementById('2407.16646v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.18073">arXiv:2403.18073</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.18073">pdf</a>, <a href="https://arxiv.org/format/2403.18073">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Workflow Mini-Apps: Portable, Scalable, Tunable &amp; Faithful Representations of Scientific Workflows </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Kilic%2C+O+O">Ozgur Ozan Kilic</a>, <a href="/search/cs?searchtype=author&amp;query=Wang%2C+T">Tianle Wang</a>, <a href="/search/cs?searchtype=author&amp;query=Turilli%2C+M">Matteo Turilli</a>, <a href="/search/cs?searchtype=author&amp;query=Titov%2C+M">Mikhail Titov</a>, <a href="/search/cs?searchtype=author&amp;query=Merzky%2C+A">Andre Merzky</a>, <a href="/search/cs?searchtype=author&amp;query=Pouchard%2C+L">Line Pouchard</a>, <a href="/search/cs?searchtype=author&amp;query=Jha%2C+S">Shantenu Jha</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.18073v1-abstract-short" style="display: inline;"> Workflows are critical for scientific discovery. However, the sophistication, heterogeneity, and scale of workflows make building, testing, and optimizing them increasingly challenging. Furthermore, their complexity and heterogeneity make performance reproducibility hard. In this paper, we propose workflow mini-apps as a tool to address the challenges in building and testing workflows while contro&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.18073v1-abstract-full').style.display = 'inline'; document.getElementById('2403.18073v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.18073v1-abstract-full" style="display: none;"> Workflows are critical for scientific discovery. However, the sophistication, heterogeneity, and scale of workflows make building, testing, and optimizing them increasingly challenging. Furthermore, their complexity and heterogeneity make performance reproducibility hard. In this paper, we propose workflow mini-apps as a tool to address the challenges in building and testing workflows while controlling the fidelity of representing realworld workflows. Workflow mini-apps are deployed and run on various HPC systems and architectures without workflow-specific constraints. We offer insight into their design and implementation, providing an analysis of their performance and reproducibility. Workflow mini-apps thus advance the science of workflows by providing simple, portable, and managed (fidelity) representations of otherwise complex and difficult-to-control real workflows. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.18073v1-abstract-full').style.display = 'none'; document.getElementById('2403.18073v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 26 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2403.15721">arXiv:2403.15721</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2403.15721">pdf</a>, <a href="https://arxiv.org/format/2403.15721">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Design and Implementation of an Analysis Pipeline for Heterogeneous Data </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Sarker%2C+A+K">Arup Kumar Sarker</a>, <a href="/search/cs?searchtype=author&amp;query=Alsaadi%2C+A">Aymen Alsaadi</a>, <a href="/search/cs?searchtype=author&amp;query=Perera%2C+N">Niranda Perera</a>, <a href="/search/cs?searchtype=author&amp;query=Staylor%2C+M">Mills Staylor</a>, <a href="/search/cs?searchtype=author&amp;query=von+Laszewski%2C+G">Gregor von Laszewski</a>, <a href="/search/cs?searchtype=author&amp;query=Turilli%2C+M">Matteo Turilli</a>, <a href="/search/cs?searchtype=author&amp;query=Kilic%2C+O+O">Ozgur Ozan Kilic</a>, <a href="/search/cs?searchtype=author&amp;query=Titov%2C+M">Mikhail Titov</a>, <a href="/search/cs?searchtype=author&amp;query=Merzky%2C+A">Andre Merzky</a>, <a href="/search/cs?searchtype=author&amp;query=Jha%2C+S">Shantenu Jha</a>, <a href="/search/cs?searchtype=author&amp;query=Fox%2C+G">Geoffrey Fox</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2403.15721v3-abstract-short" style="display: inline;"> Managing and preparing complex data for deep learning, a prevalent approach in large-scale data science can be challenging. Data transfer for model training also presents difficulties, impacting scientific fields like genomics, climate modeling, and astronomy. A large-scale solution like Google Pathways with a distributed execution environment for deep learning models exists but is proprietary. In&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.15721v3-abstract-full').style.display = 'inline'; document.getElementById('2403.15721v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2403.15721v3-abstract-full" style="display: none;"> Managing and preparing complex data for deep learning, a prevalent approach in large-scale data science can be challenging. Data transfer for model training also presents difficulties, impacting scientific fields like genomics, climate modeling, and astronomy. A large-scale solution like Google Pathways with a distributed execution environment for deep learning models exists but is proprietary. Integrating existing open-source, scalable runtime tools and data frameworks on high-performance computing (HPC) platforms is crucial to address these challenges. Our objective is to establish a smooth and unified method of combining data engineering and deep learning frameworks with diverse execution capabilities that can be deployed on various high-performance computing platforms, including cloud and supercomputers. We aim to support heterogeneous systems with accelerators, where Cylon and other data engineering and deep learning frameworks can utilize heterogeneous execution. To achieve this, we propose Radical-Cylon, a heterogeneous runtime system with a parallel and distributed data framework to execute Cylon as a task of Radical Pilot. We thoroughly explain Radical-Cylon&#39;s design and development and the execution process of Cylon tasks using Radical Pilot. This approach enables the use of heterogeneous MPI-communicators across multiple nodes. Radical-Cylon achieves better performance than Bare-Metal Cylon with minimal and constant overhead. Radical-Cylon achieves (4~15)% faster execution time than batch execution while performing similar join and sort operations with 35 million and 3.5 billion rows with the same resources. The approach aims to excel in both scientific and engineering research HPC systems while demonstrating robust performance on cloud infrastructures. This dual capability fosters collaboration and innovation within the open-source scientific research community. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2403.15721v3-abstract-full').style.display = 'none'; document.getElementById('2403.15721v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 April, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 23 March, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">14 pages, 16 figures, 2 tables</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> H.2.4; D.2.7; D.2.2 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.07895">arXiv:2307.07895</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2307.07895">pdf</a>, <a href="https://arxiv.org/format/2307.07895">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/e-Science58273.2023.10254912">10.1109/e-Science58273.2023.10254912 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> PSI/J: A Portable Interface for Submitting, Monitoring, and Managing Jobs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Hategan-Marandiuc%2C+M">Mihael Hategan-Marandiuc</a>, <a href="/search/cs?searchtype=author&amp;query=Merzky%2C+A">Andre Merzky</a>, <a href="/search/cs?searchtype=author&amp;query=Collier%2C+N">Nicholson Collier</a>, <a href="/search/cs?searchtype=author&amp;query=Maheshwari%2C+K">Ketan Maheshwari</a>, <a href="/search/cs?searchtype=author&amp;query=Ozik%2C+J">Jonathan Ozik</a>, <a href="/search/cs?searchtype=author&amp;query=Turilli%2C+M">Matteo Turilli</a>, <a href="/search/cs?searchtype=author&amp;query=Wilke%2C+A">Andreas Wilke</a>, <a href="/search/cs?searchtype=author&amp;query=Wozniak%2C+J+M">Justin M. Wozniak</a>, <a href="/search/cs?searchtype=author&amp;query=Chard%2C+K">Kyle Chard</a>, <a href="/search/cs?searchtype=author&amp;query=Foster%2C+I">Ian Foster</a>, <a href="/search/cs?searchtype=author&amp;query=da+Silva%2C+R+F">Rafael Ferreira da Silva</a>, <a href="/search/cs?searchtype=author&amp;query=Jha%2C+S">Shantenu Jha</a>, <a href="/search/cs?searchtype=author&amp;query=Laney%2C+D">Daniel Laney</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.07895v2-abstract-short" style="display: inline;"> It is generally desirable for high-performance computing (HPC) applications to be portable between HPC systems, for example to make use of more performant hardware, make effective use of allocations, and to co-locate compute jobs with large datasets. Unfortunately, moving scientific applications between HPC systems is challenging for various reasons, most notably that HPC systems have different HP&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.07895v2-abstract-full').style.display = 'inline'; document.getElementById('2307.07895v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.07895v2-abstract-full" style="display: none;"> It is generally desirable for high-performance computing (HPC) applications to be portable between HPC systems, for example to make use of more performant hardware, make effective use of allocations, and to co-locate compute jobs with large datasets. Unfortunately, moving scientific applications between HPC systems is challenging for various reasons, most notably that HPC systems have different HPC schedulers. We introduce PSI/J, a job management abstraction API intended to simplify the construction of software components and applications that are portable over various HPC scheduler implementations. We argue that such a system is both necessary and that no viable alternative currently exists. We analyze similar notable APIs and attempt to determine the factors that influenced their evolution and adoption by the HPC community. We base the design of PSI/J on that analysis. We describe how PSI/J has been integrated in three workflow systems and one application, and also show via experiments that PSI/J imposes minimal overhead. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.07895v2-abstract-full').style.display = 'none'; document.getElementById('2307.07895v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 September, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2209.00114">arXiv:2209.00114</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2209.00114">pdf</a>, <a href="https://arxiv.org/format/2209.00114">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/CCGrid54584.2022.00069">10.1109/CCGrid54584.2022.00069 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> RAPTOR: Ravenous Throughput Computing </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Merzky%2C+A">Andre Merzky</a>, <a href="/search/cs?searchtype=author&amp;query=Turilli%2C+M">Matteo Turilli</a>, <a href="/search/cs?searchtype=author&amp;query=Jha%2C+S">Shantenu Jha</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2209.00114v1-abstract-short" style="display: inline;"> We describe the design, implementation and performance of the RADICAL-Pilot task overlay (RAPTOR). RAPTOR enables the execution of heterogeneous tasks -- i.e., functions and executables with arbitrary duration -- on HPC platforms, providing high throughput and high resource utilization. RAPTOR supports the high throughput virtual screening requirements of DOE&#39;s National Virtual Biotechnology Labor&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.00114v1-abstract-full').style.display = 'inline'; document.getElementById('2209.00114v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2209.00114v1-abstract-full" style="display: none;"> We describe the design, implementation and performance of the RADICAL-Pilot task overlay (RAPTOR). RAPTOR enables the execution of heterogeneous tasks -- i.e., functions and executables with arbitrary duration -- on HPC platforms, providing high throughput and high resource utilization. RAPTOR supports the high throughput virtual screening requirements of DOE&#39;s National Virtual Biotechnology Laboratory effort to find therapeutic solutions for COVID-19. RAPTOR has been used on $&gt;8000$ compute nodes to sustain 144M/hour docking hits, and to screen $\sim$10$^{11}$ ligands. To the best of our knowledge, both the throughput rate and aggregated number of executed tasks are a factor of two greater than previously reported in literature. RAPTOR represents important progress towards improvement of computational drug discovery, in terms of size of libraries screened, and for the possibility of generating training data fast enough to serve the last generation of docking surrogate models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2209.00114v1-abstract-full').style.display = 'none'; document.getElementById('2209.00114v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 9 figures. 22nd International Symposium on Cluster, Cloud and Internet Computing (CCGrid 2022)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2201.06962">arXiv:2201.06962</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2201.06962">pdf</a>, <a href="https://arxiv.org/format/2201.06962">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Atmospheric and Oceanic Physics">physics.ao-ph</span> </div> </div> <p class="title is-5 mathjax"> A Scalable Solution for Running Ensemble Simulations for Photovoltaic Energy </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Hu%2C+W">Weiming Hu</a>, <a href="/search/cs?searchtype=author&amp;query=Cervone%2C+G">Guido Cervone</a>, <a href="/search/cs?searchtype=author&amp;query=Turilli%2C+M">Matteo Turilli</a>, <a href="/search/cs?searchtype=author&amp;query=Merzky%2C+A">Andre Merzky</a>, <a href="/search/cs?searchtype=author&amp;query=Jha%2C+S">Shantenu Jha</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2201.06962v1-abstract-short" style="display: inline;"> This chapter proposes and provides an in-depth discussion of a scalable solution for running ensemble simulation for solar energy production. Generating a forecast ensemble is computationally expensive. But with the help of Analog Ensemble, forecast ensembles can be generated with a single deterministic run of a weather forecast model. Weather ensembles are then used to simulate 11 10 KW photovolt&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.06962v1-abstract-full').style.display = 'inline'; document.getElementById('2201.06962v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2201.06962v1-abstract-full" style="display: none;"> This chapter proposes and provides an in-depth discussion of a scalable solution for running ensemble simulation for solar energy production. Generating a forecast ensemble is computationally expensive. But with the help of Analog Ensemble, forecast ensembles can be generated with a single deterministic run of a weather forecast model. Weather ensembles are then used to simulate 11 10 KW photovoltaic solar power systems to study the simulation uncertainty under a wide range of panel configuration and weather conditions. This computational workflow has been deployed onto the NCAR supercomputer, Cheyenne, with more than 7,000 cores. Results show that, spring and summer are typically associated with a larger simulation uncertainty. Optimizing the panel configuration based on their individual performance under changing weather conditions can improve the simulation accuracy by more than 12%. This work also shows how panel configuration can be optimized based on geographic locations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2201.06962v1-abstract-full').style.display = 'none'; document.getElementById('2201.06962v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 10 January, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2108.13521">arXiv:2108.13521</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2108.13521">pdf</a>, <a href="https://arxiv.org/format/2108.13521">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> ExaWorks: Workflows for Exascale </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Al-Saadi%2C+A">Aymen Al-Saadi</a>, <a href="/search/cs?searchtype=author&amp;query=Ahn%2C+D+H">Dong H. Ahn</a>, <a href="/search/cs?searchtype=author&amp;query=Babuji%2C+Y">Yadu Babuji</a>, <a href="/search/cs?searchtype=author&amp;query=Chard%2C+K">Kyle Chard</a>, <a href="/search/cs?searchtype=author&amp;query=Corbett%2C+J">James Corbett</a>, <a href="/search/cs?searchtype=author&amp;query=Hategan%2C+M">Mihael Hategan</a>, <a href="/search/cs?searchtype=author&amp;query=Herbein%2C+S">Stephen Herbein</a>, <a href="/search/cs?searchtype=author&amp;query=Jha%2C+S">Shantenu Jha</a>, <a href="/search/cs?searchtype=author&amp;query=Laney%2C+D">Daniel Laney</a>, <a href="/search/cs?searchtype=author&amp;query=Merzky%2C+A">Andre Merzky</a>, <a href="/search/cs?searchtype=author&amp;query=Munson%2C+T">Todd Munson</a>, <a href="/search/cs?searchtype=author&amp;query=Salim%2C+M">Michael Salim</a>, <a href="/search/cs?searchtype=author&amp;query=Titov%2C+M">Mikhail Titov</a>, <a href="/search/cs?searchtype=author&amp;query=Turilli%2C+M">Matteo Turilli</a>, <a href="/search/cs?searchtype=author&amp;query=Wozniak%2C+J+M">Justin M. Wozniak</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2108.13521v1-abstract-short" style="display: inline;"> Exascale computers will offer transformative capabilities to combine data-driven and learning-based approaches with traditional simulation applications to accelerate scientific discovery and insight. These software combinations and integrations, however, are difficult to achieve due to challenges of coordination and deployment of heterogeneous software components on diverse and massive platforms.&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.13521v1-abstract-full').style.display = 'inline'; document.getElementById('2108.13521v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2108.13521v1-abstract-full" style="display: none;"> Exascale computers will offer transformative capabilities to combine data-driven and learning-based approaches with traditional simulation applications to accelerate scientific discovery and insight. These software combinations and integrations, however, are difficult to achieve due to challenges of coordination and deployment of heterogeneous software components on diverse and massive platforms. We present the ExaWorks project, which can address many of these challenges: ExaWorks is leading a co-design process to create a workflow software development Toolkit (SDK) consisting of a wide range of workflow management tools that can be composed and interoperate through common interfaces. We describe the initial set of tools and interfaces supported by the SDK, efforts to make them easier to apply to complex science challenges, and examples of their application to exemplar cases. Furthermore, we discuss how our project is working with the workflows community, large computing facilities as well as HPC platform vendors to sustainably address the requirements of workflows at the exascale. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2108.13521v1-abstract-full').style.display = 'none'; document.getElementById('2108.13521v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 August, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.07036">arXiv:2106.07036</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2106.07036">pdf</a>, <a href="https://arxiv.org/format/2106.07036">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Biomolecules">q-bio.BM</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Protein-Ligand Docking Surrogate Models: A SARS-CoV-2 Benchmark for Deep Learning Accelerated Virtual Screening </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Clyde%2C+A">Austin Clyde</a>, <a href="/search/cs?searchtype=author&amp;query=Brettin%2C+T">Thomas Brettin</a>, <a href="/search/cs?searchtype=author&amp;query=Partin%2C+A">Alexander Partin</a>, <a href="/search/cs?searchtype=author&amp;query=Yoo%2C+H">Hyunseung Yoo</a>, <a href="/search/cs?searchtype=author&amp;query=Babuji%2C+Y">Yadu Babuji</a>, <a href="/search/cs?searchtype=author&amp;query=Blaiszik%2C+B">Ben Blaiszik</a>, <a href="/search/cs?searchtype=author&amp;query=Merzky%2C+A">Andre Merzky</a>, <a href="/search/cs?searchtype=author&amp;query=Turilli%2C+M">Matteo Turilli</a>, <a href="/search/cs?searchtype=author&amp;query=Jha%2C+S">Shantenu Jha</a>, <a href="/search/cs?searchtype=author&amp;query=Ramanathan%2C+A">Arvind Ramanathan</a>, <a href="/search/cs?searchtype=author&amp;query=Stevens%2C+R">Rick Stevens</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.07036v2-abstract-short" style="display: inline;"> We propose a benchmark to study surrogate model accuracy for protein-ligand docking. We share a dataset consisting of 200 million 3D complex structures and 2D structure scores across a consistent set of 13 million &#34;in-stock&#34; molecules over 15 receptors, or binding sites, across the SARS-CoV-2 proteome. Our work shows surrogate docking models have six orders of magnitude more throughput than standa&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.07036v2-abstract-full').style.display = 'inline'; document.getElementById('2106.07036v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.07036v2-abstract-full" style="display: none;"> We propose a benchmark to study surrogate model accuracy for protein-ligand docking. We share a dataset consisting of 200 million 3D complex structures and 2D structure scores across a consistent set of 13 million &#34;in-stock&#34; molecules over 15 receptors, or binding sites, across the SARS-CoV-2 proteome. Our work shows surrogate docking models have six orders of magnitude more throughput than standard docking protocols on the same supercomputer node types. We demonstrate the power of high-speed surrogate models by running each target against 1 billion molecules in under a day (50k predictions per GPU seconds). We showcase a workflow for docking utilizing surrogate ML models as a pre-filter. Our workflow is ten times faster at screening a library of compounds than the standard technique, with an error rate less than 0.01\% of detecting the underlying best scoring 0.1\% of compounds. Our analysis of the speedup explains that to screen more molecules under a docking paradigm, another order of magnitude speedup must come from model accuracy rather than computing speed (which, if increased, will not anymore alter our throughput to screen molecules). We believe this is strong evidence for the community to begin focusing on improving the accuracy of surrogate models to improve the ability to screen massive compound libraries 100x or even 1000x faster than current techniques. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.07036v2-abstract-full').style.display = 'none'; document.getElementById('2106.07036v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2106.05177">arXiv:2106.05177</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2106.05177">pdf</a>, <a href="https://arxiv.org/format/2106.05177">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.5281/zenodo.4915801">10.5281/zenodo.4915801 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Workflows Community Summit: Advancing the State-of-the-art of Scientific Workflows Management Systems Research and Development </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=da+Silva%2C+R+F">Rafael Ferreira da Silva</a>, <a href="/search/cs?searchtype=author&amp;query=Casanova%2C+H">Henri Casanova</a>, <a href="/search/cs?searchtype=author&amp;query=Chard%2C+K">Kyle Chard</a>, <a href="/search/cs?searchtype=author&amp;query=Coleman%2C+T">Tain茫 Coleman</a>, <a href="/search/cs?searchtype=author&amp;query=Laney%2C+D">Dan Laney</a>, <a href="/search/cs?searchtype=author&amp;query=Ahn%2C+D">Dong Ahn</a>, <a href="/search/cs?searchtype=author&amp;query=Jha%2C+S">Shantenu Jha</a>, <a href="/search/cs?searchtype=author&amp;query=Howell%2C+D">Dorran Howell</a>, <a href="/search/cs?searchtype=author&amp;query=Soiland-Reys%2C+S">Stian Soiland-Reys</a>, <a href="/search/cs?searchtype=author&amp;query=Altintas%2C+I">Ilkay Altintas</a>, <a href="/search/cs?searchtype=author&amp;query=Thain%2C+D">Douglas Thain</a>, <a href="/search/cs?searchtype=author&amp;query=Filgueira%2C+R">Rosa Filgueira</a>, <a href="/search/cs?searchtype=author&amp;query=Babuji%2C+Y">Yadu Babuji</a>, <a href="/search/cs?searchtype=author&amp;query=Badia%2C+R+M">Rosa M. Badia</a>, <a href="/search/cs?searchtype=author&amp;query=Balis%2C+B">Bartosz Balis</a>, <a href="/search/cs?searchtype=author&amp;query=Caino-Lores%2C+S">Silvina Caino-Lores</a>, <a href="/search/cs?searchtype=author&amp;query=Callaghan%2C+S">Scott Callaghan</a>, <a href="/search/cs?searchtype=author&amp;query=Coppens%2C+F">Frederik Coppens</a>, <a href="/search/cs?searchtype=author&amp;query=Crusoe%2C+M+R">Michael R. Crusoe</a>, <a href="/search/cs?searchtype=author&amp;query=De%2C+K">Kaushik De</a>, <a href="/search/cs?searchtype=author&amp;query=Di+Natale%2C+F">Frank Di Natale</a>, <a href="/search/cs?searchtype=author&amp;query=Do%2C+T+M+A">Tu M. A. Do</a>, <a href="/search/cs?searchtype=author&amp;query=Enders%2C+B">Bjoern Enders</a>, <a href="/search/cs?searchtype=author&amp;query=Fahringer%2C+T">Thomas Fahringer</a>, <a href="/search/cs?searchtype=author&amp;query=Fouilloux%2C+A">Anne Fouilloux</a> , et al. (33 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2106.05177v1-abstract-short" style="display: inline;"> Scientific workflows are a cornerstone of modern scientific computing, and they have underpinned some of the most significant discoveries of the last decade. Many of these workflows have high computational, storage, and/or communication demands, and thus must execute on a wide range of large-scale platforms, from large clouds to upcoming exascale HPC platforms. Workflows will play a crucial role i&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.05177v1-abstract-full').style.display = 'inline'; document.getElementById('2106.05177v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2106.05177v1-abstract-full" style="display: none;"> Scientific workflows are a cornerstone of modern scientific computing, and they have underpinned some of the most significant discoveries of the last decade. Many of these workflows have high computational, storage, and/or communication demands, and thus must execute on a wide range of large-scale platforms, from large clouds to upcoming exascale HPC platforms. Workflows will play a crucial role in the data-oriented and post-Moore&#39;s computing landscape as they democratize the application of cutting-edge research techniques, computationally intensive methods, and use of new computing platforms. As workflows continue to be adopted by scientific projects and user communities, they are becoming more complex. Workflows are increasingly composed of tasks that perform computations such as short machine learning inference, multi-node simulations, long-running machine learning model training, amongst others, and thus increasingly rely on heterogeneous architectures that include CPUs but also GPUs and accelerators. The workflow management system (WMS) technology landscape is currently segmented and presents significant barriers to entry due to the hundreds of seemingly comparable, yet incompatible, systems that exist. Another fundamental problem is that there are conflicting theoretical bases and abstractions for a WMS. Systems that use the same underlying abstractions can likely be translated between, which is not the case for systems that use different abstractions. More information: https://workflowsri.org/summits/technical <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2106.05177v1-abstract-full').style.display = 'none'; document.getElementById('2106.05177v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 June, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2105.13185">arXiv:2105.13185</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2105.13185">pdf</a>, <a href="https://arxiv.org/format/2105.13185">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> RADICAL-Pilot and Parsl: Executing Heterogeneous Workflows on HPC Platforms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Alsaadi%2C+A">Aymen Alsaadi</a>, <a href="/search/cs?searchtype=author&amp;query=Ward%2C+L">Logan Ward</a>, <a href="/search/cs?searchtype=author&amp;query=Merzky%2C+A">Andre Merzky</a>, <a href="/search/cs?searchtype=author&amp;query=Chard%2C+K">Kyle Chard</a>, <a href="/search/cs?searchtype=author&amp;query=Foster%2C+I">Ian Foster</a>, <a href="/search/cs?searchtype=author&amp;query=Jha%2C+S">Shantenu Jha</a>, <a href="/search/cs?searchtype=author&amp;query=Turilli%2C+M">Matteo Turilli</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2105.13185v2-abstract-short" style="display: inline;"> Workflows applications are becoming increasingly important to support scientific discovery. That is leading to a proliferation of workflow management systems and, thus, to a fragmented software ecosystem. Integration among existing workflow tools can improve development efficiency and, ultimately, increase the sustainability of scientific workflow software. We describe our experience with integrat&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.13185v2-abstract-full').style.display = 'inline'; document.getElementById('2105.13185v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2105.13185v2-abstract-full" style="display: none;"> Workflows applications are becoming increasingly important to support scientific discovery. That is leading to a proliferation of workflow management systems and, thus, to a fragmented software ecosystem. Integration among existing workflow tools can improve development efficiency and, ultimately, increase the sustainability of scientific workflow software. We describe our experience with integrating RADICAL-Pilot (RP) and Parsl as a way to enable users to develop and execute workflow applications with heterogeneous tasks on heterogeneous high-performance computing resources. We describe our approach to the integration of the two systems and detail the development of RPEX, a Parsl executor which uses RP as its workload manager. We develop an RP executor that executes heterogeneous MPI Python functions on CPU cores and GPUs. We measure the weak and strong scaling of RPEX, RP, and Parsl when providing new capabilities to two paradigmatic use cases: Colmena and Ice Wedge Polygons <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2105.13185v2-abstract-full').style.display = 'none'; document.getElementById('2105.13185v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 May, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2103.02843">arXiv:2103.02843</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2103.02843">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Biological Physics">physics.bio-ph</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1098/rsfs.2021.0018">10.1098/rsfs.2021.0018 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Pandemic Drugs at Pandemic Speed: Infrastructure for Accelerating COVID-19 Drug Discovery with Hybrid Machine Learning- and Physics-based Simulations on High Performance Computers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Bhati%2C+A+P">Agastya P. Bhati</a>, <a href="/search/cs?searchtype=author&amp;query=Wan%2C+S">Shunzhou Wan</a>, <a href="/search/cs?searchtype=author&amp;query=Alf%C3%A8%2C+D">Dario Alf猫</a>, <a href="/search/cs?searchtype=author&amp;query=Clyde%2C+A+R">Austin R. Clyde</a>, <a href="/search/cs?searchtype=author&amp;query=Bode%2C+M">Mathis Bode</a>, <a href="/search/cs?searchtype=author&amp;query=Tan%2C+L">Li Tan</a>, <a href="/search/cs?searchtype=author&amp;query=Titov%2C+M">Mikhail Titov</a>, <a href="/search/cs?searchtype=author&amp;query=Merzky%2C+A">Andre Merzky</a>, <a href="/search/cs?searchtype=author&amp;query=Turilli%2C+M">Matteo Turilli</a>, <a href="/search/cs?searchtype=author&amp;query=Jha%2C+S">Shantenu Jha</a>, <a href="/search/cs?searchtype=author&amp;query=Highfield%2C+R+R">Roger R. Highfield</a>, <a href="/search/cs?searchtype=author&amp;query=Rocchia%2C+W">Walter Rocchia</a>, <a href="/search/cs?searchtype=author&amp;query=Scafuri%2C+N">Nicola Scafuri</a>, <a href="/search/cs?searchtype=author&amp;query=Succi%2C+S">Sauro Succi</a>, <a href="/search/cs?searchtype=author&amp;query=Kranzlm%C3%BCller%2C+D">Dieter Kranzlm眉ller</a>, <a href="/search/cs?searchtype=author&amp;query=Mathias%2C+G">Gerald Mathias</a>, <a href="/search/cs?searchtype=author&amp;query=Wifling%2C+D">David Wifling</a>, <a href="/search/cs?searchtype=author&amp;query=Donon%2C+Y">Yann Donon</a>, <a href="/search/cs?searchtype=author&amp;query=Di+Meglio%2C+A">Alberto Di Meglio</a>, <a href="/search/cs?searchtype=author&amp;query=Vallecorsa%2C+S">Sofia Vallecorsa</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+H">Heng Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Trifan%2C+A">Anda Trifan</a>, <a href="/search/cs?searchtype=author&amp;query=Ramanathan%2C+A">Arvind Ramanathan</a>, <a href="/search/cs?searchtype=author&amp;query=Brettin%2C+T">Tom Brettin</a>, <a href="/search/cs?searchtype=author&amp;query=Partin%2C+A">Alexander Partin</a> , et al. (4 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2103.02843v2-abstract-short" style="display: inline;"> The race to meet the challenges of the global pandemic has served as a reminder that the existing drug discovery process is expensive, inefficient and slow. There is a major bottleneck screening the vast number of potential small molecules to shortlist lead compounds for antiviral drug development. New opportunities to accelerate drug discovery lie at the interface between machine learning methods&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.02843v2-abstract-full').style.display = 'inline'; document.getElementById('2103.02843v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2103.02843v2-abstract-full" style="display: none;"> The race to meet the challenges of the global pandemic has served as a reminder that the existing drug discovery process is expensive, inefficient and slow. There is a major bottleneck screening the vast number of potential small molecules to shortlist lead compounds for antiviral drug development. New opportunities to accelerate drug discovery lie at the interface between machine learning methods, in this case developed for linear accelerators, and physics-based methods. The two in silico methods, each have their own advantages and limitations which, interestingly, complement each other. Here, we present an innovative infrastructural development that combines both approaches to accelerate drug discovery. The scale of the potential resulting workflow is such that it is dependent on supercomputing to achieve extremely high throughput. We have demonstrated the viability of this workflow for the study of inhibitors for four COVID-19 target proteins and our ability to perform the required large-scale calculations to identify lead antiviral compounds through repurposing on a variety of supercomputers. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.02843v2-abstract-full').style.display = 'none'; document.getElementById('2103.02843v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 4 September, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Interface Focus. 2021. 11 (6): 20210018 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2103.00091">arXiv:2103.00091</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2103.00091">pdf</a>, <a href="https://arxiv.org/format/2103.00091">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Design and Performance Characterization of RADICAL-Pilot on Leadership-class Platforms </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Merzky%2C+A">Andre Merzky</a>, <a href="/search/cs?searchtype=author&amp;query=Turilli%2C+M">Matteo Turilli</a>, <a href="/search/cs?searchtype=author&amp;query=Titov%2C+M">Mikhail Titov</a>, <a href="/search/cs?searchtype=author&amp;query=Al-Saadi%2C+A">Aymen Al-Saadi</a>, <a href="/search/cs?searchtype=author&amp;query=Jha%2C+S">Shantenu Jha</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2103.00091v2-abstract-short" style="display: inline;"> Many extreme scale scientific applications have workloads comprised of a large number of individual high-performance tasks. The Pilot abstraction decouples workload specification, resource management, and task execution via job placeholders and late-binding. As such, suitable implementations of the Pilot abstraction can support the collective execution of large number of tasks on supercomputers. W&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.00091v2-abstract-full').style.display = 'inline'; document.getElementById('2103.00091v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2103.00091v2-abstract-full" style="display: none;"> Many extreme scale scientific applications have workloads comprised of a large number of individual high-performance tasks. The Pilot abstraction decouples workload specification, resource management, and task execution via job placeholders and late-binding. As such, suitable implementations of the Pilot abstraction can support the collective execution of large number of tasks on supercomputers. We introduce RADICAL-Pilot (RP) as a portable, modular and extensible pilot-enabled runtime system. We describe RP&#39;s design, architecture and implementation. We characterize its performance and show its ability to scalably execute workloads comprised of tens of thousands heterogeneous tasks on DOE and NSF leadership-class HPC platforms. Specifically, we investigate RP&#39;s weak/strong scaling with CPU/GPU, single/multi core, (non)MPI tasks and Python functions when using most of ORNL Summit and TACC Frontera. RADICAL-Pilot can be used stand-alone, as well as the runtime for third-party workflow systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.00091v2-abstract-full').style.display = 'none'; document.getElementById('2103.00091v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 26 February, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: text overlap with arXiv:1801.01843</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.10517">arXiv:2010.10517</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2010.10517">pdf</a>, <a href="https://arxiv.org/format/2010.10517">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> </div> </div> <p class="title is-5 mathjax"> Scalable HPC and AI Infrastructure for COVID-19 Therapeutics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Lee%2C+H">Hyungro Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Merzky%2C+A">Andre Merzky</a>, <a href="/search/cs?searchtype=author&amp;query=Tan%2C+L">Li Tan</a>, <a href="/search/cs?searchtype=author&amp;query=Titov%2C+M">Mikhail Titov</a>, <a href="/search/cs?searchtype=author&amp;query=Turilli%2C+M">Matteo Turilli</a>, <a href="/search/cs?searchtype=author&amp;query=Alfe%2C+D">Dario Alfe</a>, <a href="/search/cs?searchtype=author&amp;query=Bhati%2C+A">Agastya Bhati</a>, <a href="/search/cs?searchtype=author&amp;query=Brace%2C+A">Alex Brace</a>, <a href="/search/cs?searchtype=author&amp;query=Clyde%2C+A">Austin Clyde</a>, <a href="/search/cs?searchtype=author&amp;query=Coveney%2C+P">Peter Coveney</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+H">Heng Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Ramanathan%2C+A">Arvind Ramanathan</a>, <a href="/search/cs?searchtype=author&amp;query=Stevens%2C+R">Rick Stevens</a>, <a href="/search/cs?searchtype=author&amp;query=Trifan%2C+A">Anda Trifan</a>, <a href="/search/cs?searchtype=author&amp;query=Van+Dam%2C+H">Hubertus Van Dam</a>, <a href="/search/cs?searchtype=author&amp;query=Wan%2C+S">Shunzhou Wan</a>, <a href="/search/cs?searchtype=author&amp;query=Wilkinson%2C+S">Sean Wilkinson</a>, <a href="/search/cs?searchtype=author&amp;query=Jha%2C+S">Shantenu Jha</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.10517v1-abstract-short" style="display: inline;"> COVID-19 has claimed more 1 million lives and resulted in over 40 million infections. There is an urgent need to identify drugs that can inhibit SARS-CoV-2. In response, the DOE recently established the Medical Therapeutics project as part of the National Virtual Biotechnology Laboratory, and tasked it with creating the computational infrastructure and methods necessary to advance therapeutics dev&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.10517v1-abstract-full').style.display = 'inline'; document.getElementById('2010.10517v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.10517v1-abstract-full" style="display: none;"> COVID-19 has claimed more 1 million lives and resulted in over 40 million infections. There is an urgent need to identify drugs that can inhibit SARS-CoV-2. In response, the DOE recently established the Medical Therapeutics project as part of the National Virtual Biotechnology Laboratory, and tasked it with creating the computational infrastructure and methods necessary to advance therapeutics development. We discuss innovations in computational infrastructure and methods that are accelerating and advancing drug design. Specifically, we describe several methods that integrate artificial intelligence and simulation-based approaches, and the design of computational infrastructure to support these methods at scale. We discuss their implementation and characterize their performance, and highlight science advances that these capabilities have enabled. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.10517v1-abstract-full').style.display = 'none'; document.getElementById('2010.10517v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.06574">arXiv:2010.06574</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/2010.06574">pdf</a>, <a href="https://arxiv.org/format/2010.06574">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Quantitative Methods">q-bio.QM</span> </div> </div> <p class="title is-5 mathjax"> IMPECCABLE: Integrated Modeling PipelinE for COVID Cure by Assessing Better LEads </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Saadi%2C+A+A">Aymen Al Saadi</a>, <a href="/search/cs?searchtype=author&amp;query=Alfe%2C+D">Dario Alfe</a>, <a href="/search/cs?searchtype=author&amp;query=Babuji%2C+Y">Yadu Babuji</a>, <a href="/search/cs?searchtype=author&amp;query=Bhati%2C+A">Agastya Bhati</a>, <a href="/search/cs?searchtype=author&amp;query=Blaiszik%2C+B">Ben Blaiszik</a>, <a href="/search/cs?searchtype=author&amp;query=Brettin%2C+T">Thomas Brettin</a>, <a href="/search/cs?searchtype=author&amp;query=Chard%2C+K">Kyle Chard</a>, <a href="/search/cs?searchtype=author&amp;query=Chard%2C+R">Ryan Chard</a>, <a href="/search/cs?searchtype=author&amp;query=Coveney%2C+P">Peter Coveney</a>, <a href="/search/cs?searchtype=author&amp;query=Trifan%2C+A">Anda Trifan</a>, <a href="/search/cs?searchtype=author&amp;query=Brace%2C+A">Alex Brace</a>, <a href="/search/cs?searchtype=author&amp;query=Clyde%2C+A">Austin Clyde</a>, <a href="/search/cs?searchtype=author&amp;query=Foster%2C+I">Ian Foster</a>, <a href="/search/cs?searchtype=author&amp;query=Gibbs%2C+T">Tom Gibbs</a>, <a href="/search/cs?searchtype=author&amp;query=Jha%2C+S">Shantenu Jha</a>, <a href="/search/cs?searchtype=author&amp;query=Keipert%2C+K">Kristopher Keipert</a>, <a href="/search/cs?searchtype=author&amp;query=Kurth%2C+T">Thorsten Kurth</a>, <a href="/search/cs?searchtype=author&amp;query=Kranzlm%C3%BCller%2C+D">Dieter Kranzlm眉ller</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+H">Hyungro Lee</a>, <a href="/search/cs?searchtype=author&amp;query=Li%2C+Z">Zhuozhao Li</a>, <a href="/search/cs?searchtype=author&amp;query=Ma%2C+H">Heng Ma</a>, <a href="/search/cs?searchtype=author&amp;query=Merzky%2C+A">Andre Merzky</a>, <a href="/search/cs?searchtype=author&amp;query=Mathias%2C+G">Gerald Mathias</a>, <a href="/search/cs?searchtype=author&amp;query=Partin%2C+A">Alexander Partin</a>, <a href="/search/cs?searchtype=author&amp;query=Yin%2C+J">Junqi Yin</a> , et al. (11 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.06574v1-abstract-short" style="display: inline;"> The drug discovery process currently employed in the pharmaceutical industry typically requires about 10 years and $2-3 billion to deliver one new drug. This is both too expensive and too slow, especially in emergencies like the COVID-19 pandemic. In silicomethodologies need to be improved to better select lead compounds that can proceed to later stages of the drug discovery protocol accelerating&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.06574v1-abstract-full').style.display = 'inline'; document.getElementById('2010.06574v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.06574v1-abstract-full" style="display: none;"> The drug discovery process currently employed in the pharmaceutical industry typically requires about 10 years and $2-3 billion to deliver one new drug. This is both too expensive and too slow, especially in emergencies like the COVID-19 pandemic. In silicomethodologies need to be improved to better select lead compounds that can proceed to later stages of the drug discovery protocol accelerating the entire process. No single methodological approach can achieve the necessary accuracy with required efficiency. Here we describe multiple algorithmic innovations to overcome this fundamental limitation, development and deployment of computational infrastructure at scale integrates multiple artificial intelligence and simulation-based approaches. Three measures of performance are:(i) throughput, the number of ligands per unit time; (ii) scientific performance, the number of effective ligands sampled per unit time and (iii) peak performance, in flop/s. The capabilities outlined here have been used in production for several months as the workhorse of the computational infrastructure to support the capabilities of the US-DOE National Virtual Biotechnology Laboratory in combination with resources from the EU Centre of Excellence in Computational Biomedicine. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.06574v1-abstract-full').style.display = 'none'; document.getElementById('2010.06574v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 13 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1909.03057">arXiv:1909.03057</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1909.03057">pdf</a>, <a href="https://arxiv.org/format/1909.03057">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Characterizing the Performance of Executing Many-tasks on Summit </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Turilli%2C+M">Matteo Turilli</a>, <a href="/search/cs?searchtype=author&amp;query=Merzky%2C+A">Andre Merzky</a>, <a href="/search/cs?searchtype=author&amp;query=Naughton%2C+T">Thomas Naughton</a>, <a href="/search/cs?searchtype=author&amp;query=Elwasif%2C+W">Wael Elwasif</a>, <a href="/search/cs?searchtype=author&amp;query=Jha%2C+S">Shantenu Jha</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1909.03057v1-abstract-short" style="display: inline;"> Many scientific workloads are comprised of many tasks, where each task is an independent simulation or analysis of data. The execution of millions of tasks on heterogeneous HPC platforms requires scalable dynamic resource management and multi-level scheduling. RADICAL-Pilot (RP) -- an implementation of the Pilot abstraction, addresses these challenges and serves as an effective runtime system to e&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1909.03057v1-abstract-full').style.display = 'inline'; document.getElementById('1909.03057v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1909.03057v1-abstract-full" style="display: none;"> Many scientific workloads are comprised of many tasks, where each task is an independent simulation or analysis of data. The execution of millions of tasks on heterogeneous HPC platforms requires scalable dynamic resource management and multi-level scheduling. RADICAL-Pilot (RP) -- an implementation of the Pilot abstraction, addresses these challenges and serves as an effective runtime system to execute workloads comprised of many tasks. In this paper, we characterize the performance of executing many tasks using RP when interfaced with JSM and PRRTE on Summit: RP is responsible for resource management and task scheduling on acquired resource; JSM or PRRTE enact the placement of launching of scheduled tasks. Our experiments provide lower bounds on the performance of RP when integrated with JSM and PRRTE. Specifically, for workloads comprised of homogeneous single-core, 15 minutes-long tasks we find that: PRRTE scales better than JSM for &gt; O(1000) tasks; PRRTE overheads are negligible; and PRRTE supports optimizations that lower the impact of overheads and enable resource utilization of 63% when executing O(16K), 1-core tasks over 404 compute nodes. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1909.03057v1-abstract-full').style.display = 'none'; document.getElementById('1909.03057v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 September, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1904.03085">arXiv:1904.03085</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1904.03085">pdf</a>, <a href="https://arxiv.org/format/1904.03085">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> RADICAL-Cybertools: Middleware Building Blocks for Scalable Science </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Balasubramanian%2C+V">Vivek Balasubramanian</a>, <a href="/search/cs?searchtype=author&amp;query=Jha%2C+S">Shantenu Jha</a>, <a href="/search/cs?searchtype=author&amp;query=Merzky%2C+A">Andre Merzky</a>, <a href="/search/cs?searchtype=author&amp;query=Turilli%2C+M">Matteo Turilli</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1904.03085v1-abstract-short" style="display: inline;"> RADICAL-Cybertools (RCT) are a set of software systems that serve as middleware to develop efficient and effective tools for scientific computing. Specifically, RCT enable executing many-task applications at extreme scale and on a variety of computing infrastructures. RCT are building blocks, designed to work as stand-alone systems, integrated among themselves or integrated with third-party system&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1904.03085v1-abstract-full').style.display = 'inline'; document.getElementById('1904.03085v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1904.03085v1-abstract-full" style="display: none;"> RADICAL-Cybertools (RCT) are a set of software systems that serve as middleware to develop efficient and effective tools for scientific computing. Specifically, RCT enable executing many-task applications at extreme scale and on a variety of computing infrastructures. RCT are building blocks, designed to work as stand-alone systems, integrated among themselves or integrated with third-party systems. RCT enables innovative science in multiple domains, including but not limited to biophysics, climate science and particle physics, consuming hundreds of millions of core hours. This paper provides an overview of RCT systems, their impact, and the architectural principles and software engineering underlying RCT <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1904.03085v1-abstract-full').style.display = 'none'; document.getElementById('1904.03085v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 April, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1903.10057">arXiv:1903.10057</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1903.10057">pdf</a>, <a href="https://arxiv.org/format/1903.10057">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/MCSE.2019.2920048">10.1109/MCSE.2019.2920048 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Middleware Building Blocks for Workflow Systems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Turilli%2C+M">Matteo Turilli</a>, <a href="/search/cs?searchtype=author&amp;query=Balasubramanian%2C+V">Vivek Balasubramanian</a>, <a href="/search/cs?searchtype=author&amp;query=Merzky%2C+A">Andre Merzky</a>, <a href="/search/cs?searchtype=author&amp;query=Paraskevakos%2C+I">Ioannis Paraskevakos</a>, <a href="/search/cs?searchtype=author&amp;query=Jha%2C+S">Shantenu Jha</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1903.10057v2-abstract-short" style="display: inline;"> This paper describes a building blocks approach to the design of scientific workflow systems. We discuss RADICAL-Cybertools as one implementation of the building blocks concept, showing how they are designed and developed in accordance with this approach. This paper offers three main contributions: (i) showing the relevance of the design principles underlying the building blocks approach to suppor&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1903.10057v2-abstract-full').style.display = 'inline'; document.getElementById('1903.10057v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1903.10057v2-abstract-full" style="display: none;"> This paper describes a building blocks approach to the design of scientific workflow systems. We discuss RADICAL-Cybertools as one implementation of the building blocks concept, showing how they are designed and developed in accordance with this approach. This paper offers three main contributions: (i) showing the relevance of the design principles underlying the building blocks approach to support scientific workflows on high performance computing platforms; (ii) illustrating a set of building blocks that enable multiple points of integration, &#34;unifying&#34; conceptual reasoning across otherwise very different tools and systems; and (iii) case studies discussing how RADICAL-Cybertools are integrated with existing workflow, workload, and general purpose computing systems and used to develop domain-specific workflow systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1903.10057v2-abstract-full').style.display = 'none'; document.getElementById('1903.10057v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 June, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 24 March, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2019. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1808.00684">arXiv:1808.00684</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1808.00684">pdf</a>, <a href="https://arxiv.org/format/1808.00684">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1016/j.jocs.2018.06.012">10.1016/j.jocs.2018.06.012 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Synapse: Synthetic Application Profiler and Emulator </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Merzky%2C+A">Andre Merzky</a>, <a href="/search/cs?searchtype=author&amp;query=Ha%2C+M+T">Ming Tai Ha</a>, <a href="/search/cs?searchtype=author&amp;query=Turilli%2C+M">Matteo Turilli</a>, <a href="/search/cs?searchtype=author&amp;query=Jha%2C+S">Shantenu Jha</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1808.00684v1-abstract-short" style="display: inline;"> Motivated by the need to emulate workload execution characteristics on high-performance and distributed heterogeneous resources, we introduce Synapse. Synapse is used as a proxy application (or &#34;representative application&#34;) for real workloads, with the advantage that it can be tuned in different ways and dimensions, and also at levels of granularity that are not possible with real applications. Sy&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1808.00684v1-abstract-full').style.display = 'inline'; document.getElementById('1808.00684v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1808.00684v1-abstract-full" style="display: none;"> Motivated by the need to emulate workload execution characteristics on high-performance and distributed heterogeneous resources, we introduce Synapse. Synapse is used as a proxy application (or &#34;representative application&#34;) for real workloads, with the advantage that it can be tuned in different ways and dimensions, and also at levels of granularity that are not possible with real applications. Synapse has a platform-independent application profiler, and has the ability to emulate profiled workloads on a variety of resources. Experiments show that the automated profiling performed using Synapse captures an application&#39;s characteristics with high fidelity. The emulation of an application using Synapse can reproduce the application&#39;s execution behavior in the original run-time environment, and can also reproduce those behaviors on different run-time environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1808.00684v1-abstract-full').style.display = 'none'; document.getElementById('1808.00684v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 August, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Large portions of this work originally appeared as arXiv:1506.00272, which was subsequently published as a workshop paper. This is an extended version published in the &#34;Journal of Computational Science&#34;</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Report number:</span> 01 </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Journal of Computational Science, 27C (2018) pp. 329-344 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1801.02651">arXiv:1801.02651</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1801.02651">pdf</a>, <a href="https://arxiv.org/format/1801.02651">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Towards General Distributed Resource Selection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Ha%2C+M+T">Ming Tai Ha</a>, <a href="/search/cs?searchtype=author&amp;query=Turilli%2C+M">Matteo Turilli</a>, <a href="/search/cs?searchtype=author&amp;query=Merzky%2C+A">Andre Merzky</a>, <a href="/search/cs?searchtype=author&amp;query=Jha%2C+S">Shantenu Jha</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1801.02651v1-abstract-short" style="display: inline;"> The advantages of distributing workloads and utilizing multiple distributed resources are now well established. The type and degree of heterogeneity of distributed resources is increasing, and thus determining how to distribute the workloads becomes increasingly difficult, in particular with respect to the selection of suitable resources. We formulate and investigate the resource selection problem&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1801.02651v1-abstract-full').style.display = 'inline'; document.getElementById('1801.02651v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1801.02651v1-abstract-full" style="display: none;"> The advantages of distributing workloads and utilizing multiple distributed resources are now well established. The type and degree of heterogeneity of distributed resources is increasing, and thus determining how to distribute the workloads becomes increasingly difficult, in particular with respect to the selection of suitable resources. We formulate and investigate the resource selection problem in a way that it is agnostic of specific task and resource properties, and which is generalizable to range of metrics. Specifically, we developed a model to describe the requirements of tasks and to estimate the cost of running that task on an arbitrary resource using baseline measurements from a reference machine. We integrated our cost model with the Condor matchmaking algorithm to enable resource selection. Experimental validation of our model shows that it provides execution time estimates with 157-171% error on XSEDE resources and 18-31% on OSG resources. We use the task execution cost model to select resources for a bag-of-tasks of up to 1024 GROMACS MD simulations across the target resources. Experiments show that using the model&#39;s estimates reduces the workload&#39;s time-to-completion up to ~85% when compared to the random distribution of workload across the same resources. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1801.02651v1-abstract-full').style.display = 'none'; document.getElementById('1801.02651v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 January, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2018. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1801.01843">arXiv:1801.01843</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1801.01843">pdf</a>, <a href="https://arxiv.org/format/1801.01843">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Design and Performance Characterization of RADICAL-Pilot on Titan </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Merzky%2C+A">Andre Merzky</a>, <a href="/search/cs?searchtype=author&amp;query=Turilli%2C+M">Matteo Turilli</a>, <a href="/search/cs?searchtype=author&amp;query=Maldonado%2C+M">Manuel Maldonado</a>, <a href="/search/cs?searchtype=author&amp;query=Jha%2C+S">Shantenu Jha</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1801.01843v1-abstract-short" style="display: inline;"> Many extreme scale scientific applications have workloads comprised of a large number of individual high-performance tasks. The Pilot abstraction decouples workload specification, resource management, and task execution via job placeholders and late-binding. As such, suitable implementations of the Pilot abstraction can support the collective execution of large number of tasks on supercomputers. W&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1801.01843v1-abstract-full').style.display = 'inline'; document.getElementById('1801.01843v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1801.01843v1-abstract-full" style="display: none;"> Many extreme scale scientific applications have workloads comprised of a large number of individual high-performance tasks. The Pilot abstraction decouples workload specification, resource management, and task execution via job placeholders and late-binding. As such, suitable implementations of the Pilot abstraction can support the collective execution of large number of tasks on supercomputers. We introduce RADICAL-Pilot (RP) as a portable, modular and extensible Python-based Pilot system. We describe RP&#39;s design, architecture and implementation. We characterize its performance and show its ability to scalably execute workloads comprised of thousands of MPI tasks on Titan--a DOE leadership class facility. Specifically, we investigate RP&#39;s weak (strong) scaling properties up to 131K (65K) cores and 4096 (16384) 32 core tasks. RADICAL-Pilot can be used stand-alone, as well as integrated with other tools as a runtime system. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1801.01843v1-abstract-full').style.display = 'none'; document.getElementById('1801.01843v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 January, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2018. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1609.03484">arXiv:1609.03484</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1609.03484">pdf</a>, <a href="https://arxiv.org/format/1609.03484">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Designing Workflow Systems Using Building Blocks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Turilli%2C+M">Matteo Turilli</a>, <a href="/search/cs?searchtype=author&amp;query=Merzky%2C+A">Andre Merzky</a>, <a href="/search/cs?searchtype=author&amp;query=Balasubramanian%2C+V">Vivek Balasubramanian</a>, <a href="/search/cs?searchtype=author&amp;query=Maldonado%2C+M">Manuel Maldonado</a>, <a href="/search/cs?searchtype=author&amp;query=Jha%2C+S">Shantenu Jha</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1609.03484v3-abstract-short" style="display: inline;"> We suggest there is a need for a fresh perspective on the design and development of workflow systems and argue for a building blocks approach. We outline a description of this approach and define the properties of software building blocks. We discuss RADICAL-Cybertools as one implementation of the building blocks concept, showing how they have been designed and developed in accordance with this ap&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1609.03484v3-abstract-full').style.display = 'inline'; document.getElementById('1609.03484v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1609.03484v3-abstract-full" style="display: none;"> We suggest there is a need for a fresh perspective on the design and development of workflow systems and argue for a building blocks approach. We outline a description of this approach and define the properties of software building blocks. We discuss RADICAL-Cybertools as one implementation of the building blocks concept, showing how they have been designed and developed in accordance with this approach. Four case studies are presented, covering a dozen science problems. We discuss how RADICAL-Cybertools have been used to develop new workflow systems capabilities and integrated to enhance existing ones, illustrating the applicability and potential of software building blocks. In doing so, we have begun an investigation of an alternative approach to thinking about the design and implementation of workflow systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1609.03484v3-abstract-full').style.display = 'none'; document.getElementById('1609.03484v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 April, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 12 September, 2016; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2016. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1605.09513">arXiv:1605.09513</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1605.09513">pdf</a>, <a href="https://arxiv.org/format/1605.09513">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/eScience.2017.41">10.1109/eScience.2017.41 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Evaluating Distributed Execution of Workloads </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Turilli%2C+M">Matteo Turilli</a>, <a href="/search/cs?searchtype=author&amp;query=Babuji%2C+Y+N">Yadu Nand Babuji</a>, <a href="/search/cs?searchtype=author&amp;query=Merzky%2C+A">Andre Merzky</a>, <a href="/search/cs?searchtype=author&amp;query=Ha%2C+M+T">Ming Tai Ha</a>, <a href="/search/cs?searchtype=author&amp;query=Wilde%2C+M">Michael Wilde</a>, <a href="/search/cs?searchtype=author&amp;query=Katz%2C+D+S">Daniel S. Katz</a>, <a href="/search/cs?searchtype=author&amp;query=Jha%2C+S">Shantenu Jha</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1605.09513v3-abstract-short" style="display: inline;"> Resource selection and task placement for distributed execution poses conceptual and implementation difficulties. Although resource selection and task placement are at the core of many tools and workflow systems, the methods are ad hoc rather than being based on models. Consequently, partial and non-interoperable implementations proliferate. We address both the conceptual and implementation diffic&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1605.09513v3-abstract-full').style.display = 'inline'; document.getElementById('1605.09513v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1605.09513v3-abstract-full" style="display: none;"> Resource selection and task placement for distributed execution poses conceptual and implementation difficulties. Although resource selection and task placement are at the core of many tools and workflow systems, the methods are ad hoc rather than being based on models. Consequently, partial and non-interoperable implementations proliferate. We address both the conceptual and implementation difficulties by experimentally characterizing diverse modalities of resource selection and task placement. We compare the architectures and capabilities of two systems: the AIMES middleware and Swift workflow scripting language and runtime. We integrate these systems to enable the distributed execution of Swift workflows on Pilot-Jobs managed by the AIMES middleware. Our experiments characterize and compare alternative execution strategies by measuring the time to completion of heterogeneous uncoupled workloads executed at diverse scale and on multiple resources. We measure the adverse effects of pilot fragmentation and early binding of tasks to resources and the benefits of backfill scheduling across pilots on multiple resources. We then use this insight to execute a multi-stage workflow across five production-grade resources. We discuss the importance and implications for other tools and workflow systems. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1605.09513v3-abstract-full').style.display = 'none'; document.getElementById('1605.09513v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 November, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 May, 2016; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2016. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1601.05439">arXiv:1601.05439</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1601.05439">pdf</a>, <a href="https://arxiv.org/format/1601.05439">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> RepEx: A Flexible Framework for Scalable Replica Exchange Molecular Dynamics Simulations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Treikalis%2C+A">Antons Treikalis</a>, <a href="/search/cs?searchtype=author&amp;query=Merzky%2C+A">Andre Merzky</a>, <a href="/search/cs?searchtype=author&amp;query=Chen%2C+H">Haoyuan Chen</a>, <a href="/search/cs?searchtype=author&amp;query=Lee%2C+T">Tai-Sung Lee</a>, <a href="/search/cs?searchtype=author&amp;query=York%2C+D+M">Darrin M. York</a>, <a href="/search/cs?searchtype=author&amp;query=Jha%2C+S">Shantenu Jha</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1601.05439v1-abstract-short" style="display: inline;"> Replica Exchange (RE) simulations have emerged as an important algorithmic tool for the molecular sciences. RE simulations involve the concurrent execution of independent simulations which infrequently interact and exchange information. The next set of simulation parameters are based upon the outcome of the exchanges. Typically RE functionality is integrated into the molecular simulation softwar&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1601.05439v1-abstract-full').style.display = 'inline'; document.getElementById('1601.05439v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1601.05439v1-abstract-full" style="display: none;"> Replica Exchange (RE) simulations have emerged as an important algorithmic tool for the molecular sciences. RE simulations involve the concurrent execution of independent simulations which infrequently interact and exchange information. The next set of simulation parameters are based upon the outcome of the exchanges. Typically RE functionality is integrated into the molecular simulation software package. A primary motivation of the tight integration of RE functionality with simulation codes has been performance. This is limiting at multiple levels. First, advances in the RE methodology are tied to the molecular simulation code. Consequently these advances remain confined to the molecular simulation code for which they were developed. Second, it is difficult to extend or experiment with novel RE algorithms, since expertise in the molecular simulation code is typically required. In this paper, we propose the RepEx framework which address these aforementioned shortcomings of existing approaches, while striking the balance between flexibility (any RE scheme) and scalability (tens of thousands of replicas) over a diverse range of platforms. RepEx is designed to use a pilot-job based runtime system and support diverse RE Patterns and Execution Modes. RE Patterns are concerned with synchronization mechanisms in RE simulation, and Execution Modes with spatial and temporal mapping of workload to the CPU cores. We discuss how the design and implementation yield the following primary contributions of the RepEx framework: (i) its ability to support different RE schemes independent of molecular simulation codes, (ii) provide the ability to execute different exchange schemes and replica counts independent of the specific availability of resources, (iii) provide a runtime system that has first-class support for task-level parallelism, and (iv) required scalability along multiple dimensions. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1601.05439v1-abstract-full').style.display = 'none'; document.getElementById('1601.05439v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 January, 2016; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2016. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 13 figures</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1512.08194">arXiv:1512.08194</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1512.08194">pdf</a>, <a href="https://arxiv.org/format/1512.08194">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Using Pilot Systems to Execute Many Task Workloads on Supercomputers </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Merzky%2C+A">Andre Merzky</a>, <a href="/search/cs?searchtype=author&amp;query=Turilli%2C+M">Matteo Turilli</a>, <a href="/search/cs?searchtype=author&amp;query=Maldonado%2C+M">Manuel Maldonado</a>, <a href="/search/cs?searchtype=author&amp;query=Santcroos%2C+M">Mark Santcroos</a>, <a href="/search/cs?searchtype=author&amp;query=Jha%2C+S">Shantenu Jha</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1512.08194v4-abstract-short" style="display: inline;"> High performance computing systems have historically been designed to support applications comprised of mostly monolithic, single-job workloads. Pilot systems decouple workload specification, resource selection, and task execution via job placeholders and late-binding. Pilot systems help to satisfy the resource requirements of workloads comprised of multiple tasks. RADICAL-Pilot (RP) is a modular&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1512.08194v4-abstract-full').style.display = 'inline'; document.getElementById('1512.08194v4-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1512.08194v4-abstract-full" style="display: none;"> High performance computing systems have historically been designed to support applications comprised of mostly monolithic, single-job workloads. Pilot systems decouple workload specification, resource selection, and task execution via job placeholders and late-binding. Pilot systems help to satisfy the resource requirements of workloads comprised of multiple tasks. RADICAL-Pilot (RP) is a modular and extensible Python-based pilot system. In this paper we describe RP&#39;s design, architecture and implementation, and characterize its performance. RP is capable of spawning more than 100 tasks/second and supports the steady-state execution of up to 16K concurrent tasks. RP can be used stand-alone, as well as integrated with other application-level tools as a runtime system. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1512.08194v4-abstract-full').style.display = 'none'; document.getElementById('1512.08194v4-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 30 July, 2018; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 December, 2015; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2015. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1506.00272">arXiv:1506.00272</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1506.00272">pdf</a>, <a href="https://arxiv.org/format/1506.00272">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Synapse: Synthetic Application Profiler and Emulator </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Merzky%2C+A">Andre Merzky</a>, <a href="/search/cs?searchtype=author&amp;query=Jha%2C+S">Shantenu Jha</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1506.00272v3-abstract-short" style="display: inline;"> We introduce Synapse motivated by the needs to estimate and emulate workload execution characteristics on high-performance and distributed heterogeneous resources. Synapse has a platform independent application profiler, and the ability to emulate profiled workloads on a variety of heterogeneous resources. Synapse is used as a proxy application (or &#34;representative application&#34;) for real workloads,&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1506.00272v3-abstract-full').style.display = 'inline'; document.getElementById('1506.00272v3-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1506.00272v3-abstract-full" style="display: none;"> We introduce Synapse motivated by the needs to estimate and emulate workload execution characteristics on high-performance and distributed heterogeneous resources. Synapse has a platform independent application profiler, and the ability to emulate profiled workloads on a variety of heterogeneous resources. Synapse is used as a proxy application (or &#34;representative application&#34;) for real workloads, with the added advantage that it can be tuned at arbitrary levels of granularity in ways that are simply not possible using real applications. Experiments show that automated profiling using Synapse represents application characteristics with high fidelity. Emulation using Synapse can reproduce the application behavior in the original runtime environment, as well as reproducing properties when used in a different run-time environments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1506.00272v3-abstract-full').style.display = 'none'; document.getElementById('1506.00272v3-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 February, 2016; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 31 May, 2015; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2015. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> 2016 IEEE International Parallel and Distributed Processing Symposium Workshops, Chicago, IL, USA, May 23-27, 2016 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1504.04720">arXiv:1504.04720</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1504.04720">pdf</a>, <a href="https://arxiv.org/format/1504.04720">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Integrating Abstractions to Enhance the Execution of Distributed Applications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Turilli%2C+M">Matteo Turilli</a>, <a href="/search/cs?searchtype=author&amp;query=Liu%2C+F">Feng Liu</a>, <a href="/search/cs?searchtype=author&amp;query=Zhang%2C+Z">Zhao Zhang</a>, <a href="/search/cs?searchtype=author&amp;query=Merzky%2C+A">Andre Merzky</a>, <a href="/search/cs?searchtype=author&amp;query=Wilde%2C+M">Michael Wilde</a>, <a href="/search/cs?searchtype=author&amp;query=Weissman%2C+J">Jon Weissman</a>, <a href="/search/cs?searchtype=author&amp;query=Katz%2C+D+S">Daniel S. Katz</a>, <a href="/search/cs?searchtype=author&amp;query=Jha%2C+S">Shantenu Jha</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1504.04720v2-abstract-short" style="display: inline;"> One of the factors that limits the scale, performance, and sophistication of distributed applications is the difficulty of concurrently executing them on multiple distributed computing resources. In part, this is due to a poor understanding of the general properties and performance of the coupling between applications and dynamic resources. This paper addresses this issue by integrating abstractio&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1504.04720v2-abstract-full').style.display = 'inline'; document.getElementById('1504.04720v2-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1504.04720v2-abstract-full" style="display: none;"> One of the factors that limits the scale, performance, and sophistication of distributed applications is the difficulty of concurrently executing them on multiple distributed computing resources. In part, this is due to a poor understanding of the general properties and performance of the coupling between applications and dynamic resources. This paper addresses this issue by integrating abstractions representing distributed applications, resources, and execution processes into a pilot-based middleware. The middleware provides a platform that can specify distributed applications, execute them on multiple resource and for different configurations, and is instrumented to support investigative analysis. We analyzed the execution of distributed applications using experiments that measure the benefits of using multiple resources, the late-binding of scheduling decisions, and the use of backfill scheduling. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1504.04720v2-abstract-full').style.display = 'none'; document.getElementById('1504.04720v2-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 February, 2016; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 April, 2015; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2015. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1210.3271">arXiv:1210.3271</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1210.3271">pdf</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Grid Computing: The Next Decade -- Report and Summary </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Nabrzyski%2C+J">Jarek Nabrzyski</a>, <a href="/search/cs?searchtype=author&amp;query=Kurowski%2C+K">Krzysztof Kurowski</a>, <a href="/search/cs?searchtype=author&amp;query=Katz%2C+D+S">Daniel S. Katz</a>, <a href="/search/cs?searchtype=author&amp;query=Merzky%2C+A">Andre Merzky</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1210.3271v1-abstract-short" style="display: inline;"> The evolution of the global scientific cyberinfrastructure (CI) has, over the last 10+ years, led to a large diversity of CI instances. While specialized, competing and alternative CI building blocks are inherent to a healthy ecosystem, it also becomes apparent that the increasing degree of fragmentation is hindering interoperation, and thus limiting collaboration, which is essential for modern sc&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1210.3271v1-abstract-full').style.display = 'inline'; document.getElementById('1210.3271v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1210.3271v1-abstract-full" style="display: none;"> The evolution of the global scientific cyberinfrastructure (CI) has, over the last 10+ years, led to a large diversity of CI instances. While specialized, competing and alternative CI building blocks are inherent to a healthy ecosystem, it also becomes apparent that the increasing degree of fragmentation is hindering interoperation, and thus limiting collaboration, which is essential for modern science communities often spanning international groups and multiple disciplines (but even &#39;small sciences&#39;, with smaller and localized communities, are often embedded into the larger scientific ecosystem, and are increasingly dependent on the availability of CI.) There are different reasons why fragmentation occurs, on technical and social level. But also, it is apparent that the current funding model for creating CI components largely fails to aid the transition from research to production, by mixing CS research and IT engineering challenges into the same funding strategies. The 10th anniversary of the EU funded project &#39;Grid Lab&#39; (which was an early and ambitious attempt on providing a consolidated and science oriented cyberinfrastructure software stack to a specific science community) was taken as an opportunity to invite international leaders and early stage researchers in grid computing and e-Science from Europe, America and Asia, and, together with representatives of the EU and US funding agencies, to discuss the fundamental aspects of CI evolution, and to contemplate the options for a more coherent, more coordinated approach to the global evolution of CI. This open document represents the results of that workshop - including a draft of a mission statement and a proposal for a blueprint process - to inform the wider community as well as to encourage external experts to provide their feedback and comments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1210.3271v1-abstract-full').style.display = 'none'; document.getElementById('1210.3271v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 October, 2012; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2012. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">17 pages, 1 figure</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1207.6644">arXiv:1207.6644</a> <span>&nbsp;[<a href="https://arxiv.org/pdf/1207.6644">pdf</a>, <a href="https://arxiv.org/format/1207.6644">other</a>]&nbsp;</span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> P*: A Model of Pilot-Abstractions </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&amp;query=Luckow%2C+A">Andre Luckow</a>, <a href="/search/cs?searchtype=author&amp;query=Santcroos%2C+M">Mark Santcroos</a>, <a href="/search/cs?searchtype=author&amp;query=Weidner%2C+O">Ole Weidner</a>, <a href="/search/cs?searchtype=author&amp;query=Merzky%2C+A">Andre Merzky</a>, <a href="/search/cs?searchtype=author&amp;query=Mantha%2C+P">Pradeep Mantha</a>, <a href="/search/cs?searchtype=author&amp;query=Jha%2C+S">Shantenu Jha</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1207.6644v1-abstract-short" style="display: inline;"> Pilot-Jobs support effective distributed resource utilization, and are arguably one of the most widely-used distributed computing abstractions - as measured by the number and types of applications that use them, as well as the number of production distributed cyberinfrastructures that support them. In spite of broad uptake, there does not exist a well-defined, unifying conceptual model of Pilot-Jo&hellip; <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1207.6644v1-abstract-full').style.display = 'inline'; document.getElementById('1207.6644v1-abstract-short').style.display = 'none';">&#9661; More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1207.6644v1-abstract-full" style="display: none;"> Pilot-Jobs support effective distributed resource utilization, and are arguably one of the most widely-used distributed computing abstractions - as measured by the number and types of applications that use them, as well as the number of production distributed cyberinfrastructures that support them. In spite of broad uptake, there does not exist a well-defined, unifying conceptual model of Pilot-Jobs which can be used to define, compare and contrast different implementations. Often Pilot-Job implementations are strongly coupled to the distributed cyber-infrastructure they were originally designed for. These factors present a barrier to extensibility and interoperability. This pa- per is an attempt to (i) provide a minimal but complete model (P*) of Pilot-Jobs, (ii) establish the generality of the P* Model by mapping various existing and well known Pilot-Job frameworks such as Condor and DIANE to P*, (iii) derive an interoperable and extensible API for the P* Model (Pilot-API), (iv) validate the implementation of the Pilot-API by concurrently using multiple distinct Pilot-Job frameworks on distinct production distributed cyberinfrastructures, and (v) apply the P* Model to Pilot-Data. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1207.6644v1-abstract-full').style.display = 'none'; document.getElementById('1207.6644v1-abstract-short').style.display = 'inline';">&#9651; Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 July, 2012; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2012. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages</span> </p> </li> </ol> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>&nbsp;&nbsp;</span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10