CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–16 of 16 results for author: <span class="mathjax">Norris, B</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Norris%2C+B">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Norris, B"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Norris%2C+B&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Norris, B"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.03303">arXiv:2401.03303</a> <span> [<a href="https://arxiv.org/pdf/2401.03303">pdf</a>, <a href="https://arxiv.org/format/2401.03303">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Guiding Effort Allocation in Open-Source Software Projects Using Bus Factor Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lisan%2C+A">Aliza Lisan</a>, <a href="/search/cs?searchtype=author&query=Norris%2C+B">Boyana Norris</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.03303v1-abstract-short" style="display: inline;"> A critical issue faced by open-source software projects is the risk of key personnel leaving the project. This risk is exacerbated in large projects that have been under development for a long time and experienced growth in their development teams. One way to quantify this risk is to measure the concentration of knowledge about the project among its developers. Formally known as the Bus Factor (BF… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.03303v1-abstract-full').style.display = 'inline'; document.getElementById('2401.03303v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.03303v1-abstract-full" style="display: none;"> A critical issue faced by open-source software projects is the risk of key personnel leaving the project. This risk is exacerbated in large projects that have been under development for a long time and experienced growth in their development teams. One way to quantify this risk is to measure the concentration of knowledge about the project among its developers. Formally known as the Bus Factor (BF) of a project and defined as 'the number of key developers who would need to be incapacitated to make a project unable to proceed'. Most of the proposed algorithms for BF calculation measure a developer's knowledge of a file based on the number of commits. In this work, we propose using other metrics like lines of code changes (LOCC) and cosine difference of lines of code (change-size-cos) to calculate the BF. We use these metrics for BF calculation for five open-source GitHub projects using the CST algorithm and the RIG algorithm, which is git-blame-based. Moreover, we calculate the BF on project sub-directories that have seen the most active development recently. Lastly, we compare the results of the two algorithms in accuracy, similarity in results, execution time, and trends in BF values over time. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.03303v1-abstract-full').style.display = 'none'; document.getElementById('2401.03303v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.02010">arXiv:2311.02010</a> <span> [<a href="https://arxiv.org/pdf/2311.02010">pdf</a>, <a href="https://arxiv.org/format/2311.02010">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> A cast of thousands: How the IDEAS Productivity project has advanced software productivity and sustainability </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=McInnes%2C+L+C">Lois Curfman McInnes</a>, <a href="/search/cs?searchtype=author&query=Heroux%2C+M">Michael Heroux</a>, <a href="/search/cs?searchtype=author&query=Bernholdt%2C+D+E">David E. Bernholdt</a>, <a href="/search/cs?searchtype=author&query=Dubey%2C+A">Anshu Dubey</a>, <a href="/search/cs?searchtype=author&query=Gonsiorowski%2C+E">Elsa Gonsiorowski</a>, <a href="/search/cs?searchtype=author&query=Gupta%2C+R">Rinku Gupta</a>, <a href="/search/cs?searchtype=author&query=Marques%2C+O">Osni Marques</a>, <a href="/search/cs?searchtype=author&query=Moulton%2C+J+D">J. David Moulton</a>, <a href="/search/cs?searchtype=author&query=Nam%2C+H+A">Hai Ah Nam</a>, <a href="/search/cs?searchtype=author&query=Norris%2C+B">Boyana Norris</a>, <a href="/search/cs?searchtype=author&query=Raybourn%2C+E+M">Elaine M. Raybourn</a>, <a href="/search/cs?searchtype=author&query=Willenbring%2C+J">Jim Willenbring</a>, <a href="/search/cs?searchtype=author&query=Almgren%2C+A">Ann Almgren</a>, <a href="/search/cs?searchtype=author&query=Bartlett%2C+R">Ross Bartlett</a>, <a href="/search/cs?searchtype=author&query=Cranfill%2C+K">Kita Cranfill</a>, <a href="/search/cs?searchtype=author&query=Fickas%2C+S">Stephen Fickas</a>, <a href="/search/cs?searchtype=author&query=Frederick%2C+D">Don Frederick</a>, <a href="/search/cs?searchtype=author&query=Godoy%2C+W">William Godoy</a>, <a href="/search/cs?searchtype=author&query=Grubel%2C+P">Patricia Grubel</a>, <a href="/search/cs?searchtype=author&query=Hartman-Baker%2C+R">Rebecca Hartman-Baker</a>, <a href="/search/cs?searchtype=author&query=Huebl%2C+A">Axel Huebl</a>, <a href="/search/cs?searchtype=author&query=Lynch%2C+R">Rose Lynch</a>, <a href="/search/cs?searchtype=author&query=Thakur%2C+A+M">Addi Malviya Thakur</a>, <a href="/search/cs?searchtype=author&query=Milewicz%2C+R">Reed Milewicz</a>, <a href="/search/cs?searchtype=author&query=Miller%2C+M+C">Mark C. Miller</a> , et al. (9 additional authors not shown) </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.02010v2-abstract-short" style="display: inline;"> Computational and data-enabled science and engineering are revolutionizing advances throughout science and society, at all scales of computing. For example, teams in the U.S. DOE Exascale Computing Project have been tackling new frontiers in modeling, simulation, and analysis by exploiting unprecedented exascale computing capabilities-building an advanced software ecosystem that supports next-gene… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.02010v2-abstract-full').style.display = 'inline'; document.getElementById('2311.02010v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.02010v2-abstract-full" style="display: none;"> Computational and data-enabled science and engineering are revolutionizing advances throughout science and society, at all scales of computing. For example, teams in the U.S. DOE Exascale Computing Project have been tackling new frontiers in modeling, simulation, and analysis by exploiting unprecedented exascale computing capabilities-building an advanced software ecosystem that supports next-generation applications and addresses disruptive changes in computer architectures. However, concerns are growing about the productivity of the developers of scientific software, its sustainability, and the trustworthiness of the results that it produces. Members of the IDEAS project serve as catalysts to address these challenges through fostering software communities, incubating and curating methodologies and resources, and disseminating knowledge to advance developer productivity and software sustainability. This paper discusses how these synergistic activities are advancing scientific discovery-mitigating technical risks by building a firmer foundation for reproducible, sustainable science at all scales of computing, from laptops to clusters to exascale and beyond. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.02010v2-abstract-full').style.display = 'none'; document.getElementById('2311.02010v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 February, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 3 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">12 pages, 1 figure</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2203.07904">arXiv:2203.07904</a> <span> [<a href="https://arxiv.org/pdf/2203.07904">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Image and Video Processing">eess.IV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Unsupervised Learning Based Focal Stack Camera Depth Estimation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+Z">Zhengyu Huang</a>, <a href="/search/cs?searchtype=author&query=Du%2C+W">Weizhi Du</a>, <a href="/search/cs?searchtype=author&query=Norris%2C+T+B">Theodore B. Norris</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2203.07904v2-abstract-short" style="display: inline;"> We propose an unsupervised deep learning based method to estimate depth from focal stack camera images. On the NYU-v2 dataset, our method achieves much better depth estimation accuracy compared to single-image based methods. </span> <span class="abstract-full has-text-grey-dark mathjax" id="2203.07904v2-abstract-full" style="display: none;"> We propose an unsupervised deep learning based method to estimate depth from focal stack camera images. On the NYU-v2 dataset, our method achieves much better depth estimation accuracy compared to single-image based methods. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2203.07904v2-abstract-full').style.display = 'none'; document.getElementById('2203.07904v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 March, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> in Conference on Lasers and Electro-Optics, Technical Digest Series (Optica Publishing Group, 2022), paper JW3A.5 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2103.03922">arXiv:2103.03922</a> <span> [<a href="https://arxiv.org/pdf/2103.03922">pdf</a>, <a href="https://arxiv.org/format/2103.03922">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> </div> <p class="title is-5 mathjax"> ES-Net: An Efficient Stereo Matching Network </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Huang%2C+Z">Zhengyu Huang</a>, <a href="/search/cs?searchtype=author&query=Norris%2C+T+B">Theodore B. Norris</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+P">Panqu Wang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2103.03922v1-abstract-short" style="display: inline;"> Dense stereo matching with deep neural networks is of great interest to the research community. Existing stereo matching networks typically use slow and computationally expensive 3D convolutions to improve the performance, which is not friendly to real-world applications such as autonomous driving. In this paper, we propose the Efficient Stereo Network (ESNet), which achieves high performance and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.03922v1-abstract-full').style.display = 'inline'; document.getElementById('2103.03922v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2103.03922v1-abstract-full" style="display: none;"> Dense stereo matching with deep neural networks is of great interest to the research community. Existing stereo matching networks typically use slow and computationally expensive 3D convolutions to improve the performance, which is not friendly to real-world applications such as autonomous driving. In this paper, we propose the Efficient Stereo Network (ESNet), which achieves high performance and efficient inference at the same time. ESNet relies only on 2D convolution and computes multi-scale cost volume efficiently using a warping-based method to improve the performance in regions with fine-details. In addition, we address the matching ambiguity issue in the occluded region by proposing ESNet-M, a variant of ESNet that additionally estimates an occlusion mask without supervision. We further improve the network performance by proposing a new training scheme that includes dataset scheduling and unsupervised pre-training. Compared with other low-cost dense stereo depth estimation methods, our proposed approach achieves state-of-the-art performance on the Scene Flow [1], DrivingStereo [2], and KITTI-2015 dataset [3]. Our code will be made available. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2103.03922v1-abstract-full').style.display = 'none'; document.getElementById('2103.03922v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 March, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Submitted to IROS 2021</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2101.11489">arXiv:2101.11489</a> <span> [<a href="https://arxiv.org/pdf/2101.11489">pdf</a>, <a href="https://arxiv.org/format/2101.11489">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="High Energy Physics - Experiment">hep-ex</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> Parallelizing the Unpacking and Clustering of Detector Data for Reconstruction of Charged Particle Tracks on Multi-core CPUs and Many-core GPUs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Cerati%2C+G">Giuseppe Cerati</a>, <a href="/search/cs?searchtype=author&query=Elmer%2C+P">Peter Elmer</a>, <a href="/search/cs?searchtype=author&query=Gravelle%2C+B">Brian Gravelle</a>, <a href="/search/cs?searchtype=author&query=Kortelainen%2C+M">Matti Kortelainen</a>, <a href="/search/cs?searchtype=author&query=Krutelyov%2C+V">Vyacheslav Krutelyov</a>, <a href="/search/cs?searchtype=author&query=Lantz%2C+S">Steven Lantz</a>, <a href="/search/cs?searchtype=author&query=Masciovecchio%2C+M">Mario Masciovecchio</a>, <a href="/search/cs?searchtype=author&query=McDermott%2C+K">Kevin McDermott</a>, <a href="/search/cs?searchtype=author&query=Norris%2C+B">Boyana Norris</a>, <a href="/search/cs?searchtype=author&query=Hall%2C+A+R">Allison Reinsvold Hall</a>, <a href="/search/cs?searchtype=author&query=Reid%2C+M">Micheal Reid</a>, <a href="/search/cs?searchtype=author&query=Riley%2C+D">Daniel Riley</a>, <a href="/search/cs?searchtype=author&query=Tadel%2C+M">Matev啪 Tadel</a>, <a href="/search/cs?searchtype=author&query=Wittich%2C+P">Peter Wittich</a>, <a href="/search/cs?searchtype=author&query=Wang%2C+B">Bei Wang</a>, <a href="/search/cs?searchtype=author&query=W%C3%BCrthwein%2C+F">Frank W眉rthwein</a>, <a href="/search/cs?searchtype=author&query=Yagil%2C+A">Avraham Yagil</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2101.11489v1-abstract-short" style="display: inline;"> We present results from parallelizing the unpacking and clustering steps of the raw data from the silicon strip modules for reconstruction of charged particle tracks. Throughput is further improved by concurrently processing multiple events using nested OpenMP parallelism on CPU or CUDA streams on GPU. The new implementation along with earlier work in developing a parallelized and vectorized imple… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2101.11489v1-abstract-full').style.display = 'inline'; document.getElementById('2101.11489v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2101.11489v1-abstract-full" style="display: none;"> We present results from parallelizing the unpacking and clustering steps of the raw data from the silicon strip modules for reconstruction of charged particle tracks. Throughput is further improved by concurrently processing multiple events using nested OpenMP parallelism on CPU or CUDA streams on GPU. The new implementation along with earlier work in developing a parallelized and vectorized implementation of the combinatoric Kalman filter algorithm has enabled efficient global reconstruction of the entire event on modern computer architectures. We demonstrate the performance of the new implementation on Intel Xeon and NVIDIA GPU architectures. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2101.11489v1-abstract-full').style.display = 'none'; document.getElementById('2101.11489v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 January, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2021. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2006.09473">arXiv:2006.09473</a> <span> [<a href="https://arxiv.org/pdf/2006.09473">pdf</a>, <a href="https://arxiv.org/format/2006.09473">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Performance">cs.PF</span> </div> </div> <p class="title is-5 mathjax"> Guiding Optimizations with Meliora: A Deep Walk down Memory Lane </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Meng%2C+K">Kewen Meng</a>, <a href="/search/cs?searchtype=author&query=Norris%2C+B">Boyana Norris</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2006.09473v1-abstract-short" style="display: inline;"> Performance models can be very useful for understanding the behavior of applications and hence can help guide design and optimization decisions. Unfortunately, performance modeling of nontrivial computations typically requires significant expertise and human effort. Moreover, even when performed by experts, it is necessarily limited in scope, accuracy, or both. However, since models are not typica… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.09473v1-abstract-full').style.display = 'inline'; document.getElementById('2006.09473v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2006.09473v1-abstract-full" style="display: none;"> Performance models can be very useful for understanding the behavior of applications and hence can help guide design and optimization decisions. Unfortunately, performance modeling of nontrivial computations typically requires significant expertise and human effort. Moreover, even when performed by experts, it is necessarily limited in scope, accuracy, or both. However, since models are not typically available, programmers, compilers or autotuners cannot use them easily to guide optimizations and are limited to heuristic-based methods that potentially take a lot of time to perform unnecessary transformations. We believe that streamlining model generation and making it scalable (both in terms of human effort and code size) would enable dramatic improvements in compilation techniques, as well as manual optimization and autotuning. To that end, we are building the Meliora code analysis infrastructure for machine learning-based performance model generation of arbitrary codes based on static analysis of intermediate language representations. We demonstrate good accuracy in matching known codes and show how Meliora can be used to optimize new codes though reusing optimization knowledge, either manually or in conjunction with an autotuner. When autotuning, Meliora eliminates or dramatically reduces the empirical search space, while generally achieving competitive performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.09473v1-abstract-full').style.display = 'none'; document.getElementById('2006.09473v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 8 June, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2006.04236">arXiv:2006.04236</a> <span> [<a href="https://arxiv.org/pdf/2006.04236">pdf</a>, <a href="https://arxiv.org/format/2006.04236">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/DataCom.2019.00029">10.1109/DataCom.2019.00029 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Distributed-Memory Vertex-Centric Network Embedding for Large-Scale Graphs </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Riazi%2C+S">Sara Riazi</a>, <a href="/search/cs?searchtype=author&query=Norris%2C+B">Boyana Norris</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2006.04236v1-abstract-short" style="display: inline;"> Network embedding is an important step in many different computations based on graph data. However, existing approaches are limited to small or middle size graphs with fewer than a million edges. In practice, web or social network graphs are orders of magnitude larger, thus making most current methods impractical for very large graphs. To address this problem, we introduce a new distributed-memory… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.04236v1-abstract-full').style.display = 'inline'; document.getElementById('2006.04236v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2006.04236v1-abstract-full" style="display: none;"> Network embedding is an important step in many different computations based on graph data. However, existing approaches are limited to small or middle size graphs with fewer than a million edges. In practice, web or social network graphs are orders of magnitude larger, thus making most current methods impractical for very large graphs. To address this problem, we introduce a new distributed-memory parallel network embedding method based on Apache Spark and GraphX. We demonstrate the scalability of our method as well as its ability to generate meaningful embeddings for vertex classification and link prediction on both real-world and synthetic graphs. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.04236v1-abstract-full').style.display = 'none'; document.getElementById('2006.04236v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 June, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">2019 IEEE 5th International Conference on Big Data Intelligence and Computing (DATACOM)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1707.02423">arXiv:1707.02423</a> <span> [<a href="https://arxiv.org/pdf/1707.02423">pdf</a>, <a href="https://arxiv.org/format/1707.02423">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> A Similarity Measure for GPU Kernel Subgraph Matching </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lim%2C+R">Robert Lim</a>, <a href="/search/cs?searchtype=author&query=Norris%2C+B">Boyana Norris</a>, <a href="/search/cs?searchtype=author&query=Malony%2C+A">Allen Malony</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1707.02423v3-abstract-short" style="display: inline;"> Accelerator architectures specialize in executing SIMD (single instruction, multiple data) in lockstep. Because the majority of CUDA applications are parallelized loops, control flow information can provide an in-depth characterization of a kernel. CUDAflow is a tool that statically separates CUDA binaries into basic block regions and dynamically measures instruction and basic block frequencies. C… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1707.02423v3-abstract-full').style.display = 'inline'; document.getElementById('1707.02423v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1707.02423v3-abstract-full" style="display: none;"> Accelerator architectures specialize in executing SIMD (single instruction, multiple data) in lockstep. Because the majority of CUDA applications are parallelized loops, control flow information can provide an in-depth characterization of a kernel. CUDAflow is a tool that statically separates CUDA binaries into basic block regions and dynamically measures instruction and basic block frequencies. CUDAflow captures this information in a control flow graph (CFG) and performs subgraph matching across various kernel's CFGs to gain insights to an application's resource requirements, based on the shape and traversal of the graph, instruction operations executed and registers allocated, among other information. The utility of CUDAflow is demonstrated with SHOC and Rodinia application case studies on a variety of GPU architectures, revealing novel thread divergence characteristics that facilitates end users, autotuners and compilers in generating high performing code. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1707.02423v3-abstract-full').style.display = 'none'; document.getElementById('1707.02423v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 March, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 8 July, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2017. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> 31st International Workshop on Languages and Compilers for Parallel Computing (LCPC), 2018 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1705.07575">arXiv:1705.07575</a> <span> [<a href="https://arxiv.org/pdf/1705.07575">pdf</a>, <a href="https://arxiv.org/format/1705.07575">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Performance">cs.PF</span> </div> </div> <p class="title is-5 mathjax"> Mira: A Framework for Static Performance Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Meng%2C+K">Kewen Meng</a>, <a href="/search/cs?searchtype=author&query=Norris%2C+B">Boyana Norris</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1705.07575v1-abstract-short" style="display: inline;"> The performance model of an application can pro- vide understanding about its runtime behavior on particular hardware. Such information can be analyzed by developers for performance tuning. However, model building and analyzing is frequently ignored during software development until perfor- mance problems arise because they require significant expertise and can involve many time-consuming applicat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1705.07575v1-abstract-full').style.display = 'inline'; document.getElementById('1705.07575v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1705.07575v1-abstract-full" style="display: none;"> The performance model of an application can pro- vide understanding about its runtime behavior on particular hardware. Such information can be analyzed by developers for performance tuning. However, model building and analyzing is frequently ignored during software development until perfor- mance problems arise because they require significant expertise and can involve many time-consuming application runs. In this paper, we propose a fast, accurate, flexible and user-friendly tool, Mira, for generating performance models by applying static program analysis, targeting scientific applications running on supercomputers. We parse both the source code and binary to estimate performance attributes with better accuracy than considering just source or just binary code. Because our analysis is static, the target program does not need to be executed on the target architecture, which enables users to perform analysis on available machines instead of conducting expensive exper- iments on potentially expensive resources. Moreover, statically generated models enable performance prediction on non-existent or unavailable architectures. In addition to flexibility, because model generation time is significantly reduced compared to dynamic analysis approaches, our method is suitable for rapid application performance analysis and improvement. We present several scientific application validation results to demonstrate the current capabilities of our approach on small benchmarks and a mini application. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1705.07575v1-abstract-full').style.display = 'none'; document.getElementById('1705.07575v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 May, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2017. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1704.02003">arXiv:1704.02003</a> <span> [<a href="https://arxiv.org/pdf/1704.02003">pdf</a>, <a href="https://arxiv.org/format/1704.02003">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Performance">cs.PF</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> </div> </div> <p class="title is-5 mathjax"> A Comparison of Parallel Graph Processing Implementations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Pollard%2C+S">Samuel Pollard</a>, <a href="/search/cs?searchtype=author&query=Norris%2C+B">Boyana Norris</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1704.02003v2-abstract-short" style="display: inline;"> The rapidly growing number of large network analysis problems has led to the emergence of many parallel and distributed graph processing systems---one survey in 2014 identified over 80. Since then, the landscape has evolved; some packages have become inactive while more are being developed. Determining the best approach for a given problem is infeasible for most developers. To enable easy, rigorou… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1704.02003v2-abstract-full').style.display = 'inline'; document.getElementById('1704.02003v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1704.02003v2-abstract-full" style="display: none;"> The rapidly growing number of large network analysis problems has led to the emergence of many parallel and distributed graph processing systems---one survey in 2014 identified over 80. Since then, the landscape has evolved; some packages have become inactive while more are being developed. Determining the best approach for a given problem is infeasible for most developers. To enable easy, rigorous, and repeatable comparison of the capabilities of such systems, we present an approach and associated software for analyzing the performance and scalability of parallel, open-source graph libraries. We demonstrate our approach on five graph processing packages: GraphMat, the Graph500, the Graph Algorithm Platform Benchmark Suite, GraphBIG, and PowerGraph using synthetic and real-world datasets. We examine previously overlooked aspects of parallel graph processing performance such as phases of execution and energy usage for three algorithms: breadth first search, single source shortest paths, and PageRank and compare our results to Graphalytics. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1704.02003v2-abstract-full').style.display = 'none'; document.getElementById('1704.02003v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 May, 2017; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 6 April, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> April 2017. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 10 figures, Submitted to EuroPar 2017 and rejected. Revised and submitted to IEEE Cluster 2017</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1701.08547">arXiv:1701.08547</a> <span> [<a href="https://arxiv.org/pdf/1701.08547">pdf</a>, <a href="https://arxiv.org/format/1701.08547">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Performance">cs.PF</span> </div> </div> <p class="title is-5 mathjax"> Autotuning GPU Kernels via Static and Predictive Analysis </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Lim%2C+R+V">Robert V. Lim</a>, <a href="/search/cs?searchtype=author&query=Norris%2C+B">Boyana Norris</a>, <a href="/search/cs?searchtype=author&query=Malony%2C+A+D">Allen D. Malony</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1701.08547v3-abstract-short" style="display: inline;"> Optimizing the performance of GPU kernels is challenging for both human programmers and code generators. For example, CUDA programmers must set thread and block parameters for a kernel, but might not have the intuition to make a good choice. Similarly, compilers can generate working code, but may miss tuning opportunities by not targeting GPU models or performing code transformations. Although emp… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1701.08547v3-abstract-full').style.display = 'inline'; document.getElementById('1701.08547v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1701.08547v3-abstract-full" style="display: none;"> Optimizing the performance of GPU kernels is challenging for both human programmers and code generators. For example, CUDA programmers must set thread and block parameters for a kernel, but might not have the intuition to make a good choice. Similarly, compilers can generate working code, but may miss tuning opportunities by not targeting GPU models or performing code transformations. Although empirical autotuning addresses some of these challenges, it requires extensive experimentation and search for optimal code variants. This research presents an approach for tuning CUDA kernels based on static analysis that considers fine-grained code structure and the specific GPU architecture features. Notably, our approach does not require any program runs in order to discover near-optimal parameter settings. We demonstrate the applicability of our approach in enabling code autotuners such as Orio to produce competitive code variants comparable with empirical-based methods, without the high cost of experiments. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1701.08547v3-abstract-full').style.display = 'none'; document.getElementById('1701.08547v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 June, 2017; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 30 January, 2017; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2017. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1501.06223">arXiv:1501.06223</a> <span> [<a href="https://arxiv.org/pdf/1501.06223">pdf</a>, <a href="https://arxiv.org/format/1501.06223">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Performance">cs.PF</span> </div> </div> <p class="title is-5 mathjax"> A Roofline Visualization Framework </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Spear%2C+W">Wyatt Spear</a>, <a href="/search/cs?searchtype=author&query=Norris%2C+B">Boyana Norris</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1501.06223v1-abstract-short" style="display: inline;"> The Roofline Model and its derivatives provide an intuitive representation of the best achievable performance on a given architecture. The Roofline Toolkit project is a collaboration among researchers at Argonne National Laboratory, Lawrence Berkeley National Laboratory, and the University of Oregon and consists of three main parts: hardware characterization, software characterization, and data ma… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1501.06223v1-abstract-full').style.display = 'inline'; document.getElementById('1501.06223v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1501.06223v1-abstract-full" style="display: none;"> The Roofline Model and its derivatives provide an intuitive representation of the best achievable performance on a given architecture. The Roofline Toolkit project is a collaboration among researchers at Argonne National Laboratory, Lawrence Berkeley National Laboratory, and the University of Oregon and consists of three main parts: hardware characterization, software characterization, and data manipulation and visualization interface. These components address the different aspects of performance data acquisition and manipulation required for performance analysis, modeling and optimization of codes on existing and emerging architectures. In this paper we introduce an initial implementation of the third component, a system for visualizing roofline charts and managing roofline performance analysis data. We discuss the implementation and rationale for the integration of the roofline visualization system into the Eclipse IDE. An overview of our continuing efforts and goals in the development of this project is provided. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1501.06223v1-abstract-full').style.display = 'none'; document.getElementById('1501.06223v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 25 January, 2015; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2015. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1408.1363">arXiv:1408.1363</a> <span> [<a href="https://arxiv.org/pdf/1408.1363">pdf</a>, <a href="https://arxiv.org/format/1408.1363">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Mathematical Software">cs.MS</span> </div> </div> <p class="title is-5 mathjax"> Lighthouse: A User-Centered Web Service for Linear Algebra Software </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Norris%2C+B">Boyana Norris</a>, <a href="/search/cs?searchtype=author&query=Bernstein%2C+S">Sa-Lin Bernstein</a>, <a href="/search/cs?searchtype=author&query=Nair%2C+R">Ramya Nair</a>, <a href="/search/cs?searchtype=author&query=Jessup%2C+E">Elizabeth Jessup</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1408.1363v1-abstract-short" style="display: inline;"> Various fields of science and engineering rely on linear algebra for large scale data analysis, modeling and simulation, machine learning, and other applied problems. Linear algebra computations often dominate the execution time of such applications. Meanwhile, experts in these domains typically lack the training or time required to develop efficient, high-performance implementations of linear alg… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1408.1363v1-abstract-full').style.display = 'inline'; document.getElementById('1408.1363v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1408.1363v1-abstract-full" style="display: none;"> Various fields of science and engineering rely on linear algebra for large scale data analysis, modeling and simulation, machine learning, and other applied problems. Linear algebra computations often dominate the execution time of such applications. Meanwhile, experts in these domains typically lack the training or time required to develop efficient, high-performance implementations of linear algebra algorithms. In the Lighthouse project, we enable developers with varied backgrounds to readily discover and effectively apply the best available numerical software for their problems. We have developed a search-based expert system that combines expert knowledge, machine learningbased classification of existing numerical software collections, and automated code generation and optimization. Lighthouse provides a novel software engineering environment aimed at maximizing both developer productivity and application performance for dense and sparse linear algebra computations. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1408.1363v1-abstract-full').style.display = 'none'; document.getElementById('1408.1363v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 August, 2014; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2014. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">10 pages, 10 figures, 3 tables</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> G.4; H.4; D.2.8 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1309.1894">arXiv:1309.1894</a> <span> [<a href="https://arxiv.org/pdf/1309.1894">pdf</a>, <a href="https://arxiv.org/format/1309.1894">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Performance">cs.PF</span> </div> </div> <p class="title is-5 mathjax"> Software Autotuning for Sustainable Performance Portability </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mametjanov%2C+A">Azamat Mametjanov</a>, <a href="/search/cs?searchtype=author&query=Norris%2C+B">Boyana Norris</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1309.1894v1-abstract-short" style="display: inline;"> Scientific software applications are increasingly developed by large interdiscplinary teams operating on functional modules organized around a common software framework, which is capable of integrating new functional capabilities without modifying the core of the framework. In such environment, software correctness and modularity take precedence at the expense of code performance, which is an impo… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1309.1894v1-abstract-full').style.display = 'inline'; document.getElementById('1309.1894v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1309.1894v1-abstract-full" style="display: none;"> Scientific software applications are increasingly developed by large interdiscplinary teams operating on functional modules organized around a common software framework, which is capable of integrating new functional capabilities without modifying the core of the framework. In such environment, software correctness and modularity take precedence at the expense of code performance, which is an important concern during execution on supercomputing facilities, where the allocation of core-hours is a valuable resource. To alleviate the performance problems, we propose automated performance tuning (autotuning) of software to extract the maximum performance on a given hardware platform and to enable performance portability across heterogeneous hardware platforms. The resulting code remains generic without committing to a particular software stack and yet is compile-time specializable for maximal sustained performance. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1309.1894v1-abstract-full').style.display = 'none'; document.getElementById('1309.1894v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 7 September, 2013; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2013. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1309.1780">arXiv:1309.1780</a> <span> [<a href="https://arxiv.org/pdf/1309.1780">pdf</a>, <a href="https://arxiv.org/ps/1309.1780">ps</a>, <a href="https://arxiv.org/format/1309.1780">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computational Engineering, Finance, and Science">cs.CE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Mathematical Software">cs.MS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.5334/jors.aw">10.5334/jors.aw <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Software Abstractions and Methodologies for HPC Simulation Codes on Future Architectures </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dubey%2C+A">A. Dubey</a>, <a href="/search/cs?searchtype=author&query=Brandt%2C+S">S. Brandt</a>, <a href="/search/cs?searchtype=author&query=Brower%2C+R">R. Brower</a>, <a href="/search/cs?searchtype=author&query=Giles%2C+M">M. Giles</a>, <a href="/search/cs?searchtype=author&query=Hovland%2C+P">P. Hovland</a>, <a href="/search/cs?searchtype=author&query=Lamb%2C+D+Q">D. Q. Lamb</a>, <a href="/search/cs?searchtype=author&query=Loffler%2C+F">F. Loffler</a>, <a href="/search/cs?searchtype=author&query=Norris%2C+B">B. Norris</a>, <a href="/search/cs?searchtype=author&query=OShea%2C+B">B. OShea</a>, <a href="/search/cs?searchtype=author&query=Rebbi%2C+C">C. Rebbi</a>, <a href="/search/cs?searchtype=author&query=Snir%2C+M">M. Snir</a>, <a href="/search/cs?searchtype=author&query=Thakur%2C+R">R. Thakur</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1309.1780v1-abstract-short" style="display: inline;"> Large, complex, multi-scale, multi-physics simulation codes, running on high performance com-puting (HPC) platforms, have become essential to advancing science and engineering. These codes simulate multi-scale, multi-physics phenomena with unprecedented fidelity on petascale platforms, and are used by large communities. Continued ability of these codes to run on future platforms is as crucial to t… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1309.1780v1-abstract-full').style.display = 'inline'; document.getElementById('1309.1780v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1309.1780v1-abstract-full" style="display: none;"> Large, complex, multi-scale, multi-physics simulation codes, running on high performance com-puting (HPC) platforms, have become essential to advancing science and engineering. These codes simulate multi-scale, multi-physics phenomena with unprecedented fidelity on petascale platforms, and are used by large communities. Continued ability of these codes to run on future platforms is as crucial to their communities as continued improvements in instruments and facilities are to experimental scientists. However, the ability of code developers to do these things faces a serious challenge with the paradigm shift underway in platform architecture. The complexity and uncertainty of the future platforms makes it essential to approach this challenge cooperatively as a community. We need to develop common abstractions, frameworks, programming models and software development methodologies that can be applied across a broad range of complex simulation codes, and common software infrastructure to support them. In this position paper we express and discuss our belief that such an infrastructure is critical to the deployment of existing and new large, multi-scale, multi-physics codes on future HPC platforms. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1309.1780v1-abstract-full').style.display = 'none'; document.getElementById('1309.1780v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 September, 2013; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2013. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Position Paper</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1205.1098">arXiv:1205.1098</a> <span> [<a href="https://arxiv.org/pdf/1205.1098">pdf</a>, <a href="https://arxiv.org/format/1205.1098">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Mathematical Software">cs.MS</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Performance">cs.PF</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Programming Languages">cs.PL</span> </div> </div> <p class="title is-5 mathjax"> Reliable Generation of High-Performance Matrix Algebra </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Belter%2C+G">Geoffrey Belter</a>, <a href="/search/cs?searchtype=author&query=Jessup%2C+E">Elizabeth Jessup</a>, <a href="/search/cs?searchtype=author&query=Nelson%2C+T">Thomas Nelson</a>, <a href="/search/cs?searchtype=author&query=Norris%2C+B">Boyana Norris</a>, <a href="/search/cs?searchtype=author&query=Siek%2C+J+G">Jeremy G. Siek</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1205.1098v1-abstract-short" style="display: inline;"> Scientific programmers often turn to vendor-tuned Basic Linear Algebra Subprograms (BLAS) to obtain portable high performance. However, many numerical algorithms require several BLAS calls in sequence, and those successive calls result in suboptimal performance. The entire sequence needs to be optimized in concert. Instead of vendor-tuned BLAS, a programmer could start with source code in Fortran… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1205.1098v1-abstract-full').style.display = 'inline'; document.getElementById('1205.1098v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1205.1098v1-abstract-full" style="display: none;"> Scientific programmers often turn to vendor-tuned Basic Linear Algebra Subprograms (BLAS) to obtain portable high performance. However, many numerical algorithms require several BLAS calls in sequence, and those successive calls result in suboptimal performance. The entire sequence needs to be optimized in concert. Instead of vendor-tuned BLAS, a programmer could start with source code in Fortran or C (e.g., based on the Netlib BLAS) and use a state-of-the-art optimizing compiler. However, our experiments show that optimizing compilers often attain only one-quarter the performance of hand-optimized code. In this paper we present a domain-specific compiler for matrix algebra, the Build to Order BLAS (BTO), that reliably achieves high performance using a scalable search algorithm for choosing the best combination of loop fusion, array contraction, and multithreading for data parallelism. The BTO compiler generates code that is between 16% slower and 39% faster than hand-optimized code. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1205.1098v1-abstract-full').style.display = 'none'; document.getElementById('1205.1098v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 May, 2012; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2012. </p> </li> </ol> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>