Search | arXiv e-print repository

<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/>  <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b">  <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\$","\$"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a>  <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div>  <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div>  <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–26 of 26 results for author: <span class="mathjax">Costa, D E</span> </h1> </div> <div class="level-right is-hidden-mobile">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Costa%2C+D+E">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Costa, D E"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Costa%2C+D+E&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Costa, D E"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2503.16332">arXiv:2503.16332</a> <span> [<a href="https://arxiv.org/pdf/2503.16332">pdf</a>, <a href="https://arxiv.org/format/2503.16332">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Performance">cs.PF</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3680256.3721973">10.1145/3680256.3721973 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> A Dataset of Performance Measurements and Alerts from Mozilla (Data Artifact) </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Besbes%2C+M+B">Mohamed Bilel Besbes</a>, <a href="/search/cs?searchtype=author&query=Costa%2C+D+E">Diego Elias Costa</a>, <a href="/search/cs?searchtype=author&query=Mujahid%2C+S">Suhaib Mujahid</a>, <a href="/search/cs?searchtype=author&query=Mierzwinski%2C+G">Gregory Mierzwinski</a>, <a href="/search/cs?searchtype=author&query=Castelluccio%2C+M">Marco Castelluccio</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2503.16332v1-abstract-short" style="display: inline;"> Performance regressions in software systems can lead to significant financial losses and degraded user satisfaction, making their early detection and mitigation critical. Despite the importance of practices that capture performance regressions early, there is a lack of publicly available datasets that comprehensively capture real-world performance measurements, expert-validated alerts, and associa… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.16332v1-abstract-full').style.display = 'inline'; document.getElementById('2503.16332v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2503.16332v1-abstract-full" style="display: none;"> Performance regressions in software systems can lead to significant financial losses and degraded user satisfaction, making their early detection and mitigation critical. Despite the importance of practices that capture performance regressions early, there is a lack of publicly available datasets that comprehensively capture real-world performance measurements, expert-validated alerts, and associated metadata such as bugs and testing conditions. To address this gap, we introduce a unique dataset to support various research studies in performance engineering, anomaly detection, and machine learning. This dataset was collected from Mozilla Firefox's performance testing infrastructure and comprises 5,655 performance time series, 17,989 performance alerts, and detailed annotations of resulting bugs collected from May 2023 to May 2024. By publishing this dataset, we provide researchers with an invaluable resource for studying performance trends, developing novel change point detection methods, and advancing performance regression analysis across diverse platforms and testing environments. The dataset is available at https://doi.org/10.5281/zenodo.14642238 <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2503.16332v1-abstract-full').style.display = 'none'; document.getElementById('2503.16332v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 March, 2025; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> March 2025. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.06948">arXiv:2412.06948</a> <span> [<a href="https://arxiv.org/pdf/2412.06948">pdf</a>, <a href="https://arxiv.org/format/2412.06948">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Opportunities and Security Risks of Technical Leverage: A Replication Study on the NPM Ecosystem </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Samaana%2C+H">Haya Samaana</a>, <a href="/search/cs?searchtype=author&query=Costa%2C+D+E">Diego Elias Costa</a>, <a href="/search/cs?searchtype=author&query=Abdellatif%2C+A">Ahmad Abdellatif</a>, <a href="/search/cs?searchtype=author&query=Shihab%2C+E">Emad Shihab</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.06948v1-abstract-short" style="display: inline;"> To comply with high productivity demands, software developers reuse free open-source software (FOSS) code to avoid reinventing the wheel when incorporating software features. The reliance on FOSS reuse has been shown to improve productivity and the quality of delivered software; however, reusing FOSS comes at the risk of exposing software projects to public vulnerabilities. Massacci and Pashchenko… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06948v1-abstract-full').style.display = 'inline'; document.getElementById('2412.06948v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.06948v1-abstract-full" style="display: none;"> To comply with high productivity demands, software developers reuse free open-source software (FOSS) code to avoid reinventing the wheel when incorporating software features. The reliance on FOSS reuse has been shown to improve productivity and the quality of delivered software; however, reusing FOSS comes at the risk of exposing software projects to public vulnerabilities. Massacci and Pashchenko have explored this trade-off in the Java ecosystem through the lens of technical leverage: the ratio of code borrowed from FOSS over the code developed by project maintainers. In this paper, we replicate the work of Massacci and Pashchenko and we expand the analysis to include level-1 transitive dependencies to study technical leverage in the fastest-growing NPM ecosystem. We investigated 14,042 NPM library releases and found that both opportunities and risks of technical leverage are magnified in the NPM ecosystem. Small-medium libraries leverage 2.5x more code from FOSS than their code, while large libraries leverage only 3\% of FOSS code in their projects. Our models indicate that technical leverage shortens the release cycle for small-medium libraries. However, the risk of vulnerability exposure is 4-7x higher for libraries with high technical leverage. We also expanded our replication study to include the first level of transitive dependencies, and show that the results still hold, albeit with significant changes in the magnitude of both opportunities and risks of technical leverage. Our results indicate the extremes of opportunities and risks in NPM, where high technical leverage enables fast releases but comes at the cost of security risks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.06948v1-abstract-full').style.display = 'none'; document.getElementById('2412.06948v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Manuscript accepted for publication in Empirical Software Engineering (EMSE)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.05259">arXiv:2412.05259</a> <span> [<a href="https://arxiv.org/pdf/2412.05259">pdf</a>, <a href="https://arxiv.org/format/2412.05259">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> A Machine Learning-Based Approach For Detecting Malicious PyPI Packages </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Samaana%2C+H">Haya Samaana</a>, <a href="/search/cs?searchtype=author&query=Costa%2C+D+E">Diego Elias Costa</a>, <a href="/search/cs?searchtype=author&query=Shihab%2C+E">Emad Shihab</a>, <a href="/search/cs?searchtype=author&query=Abdellatif%2C+A">Ahmad Abdellatif</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.05259v1-abstract-short" style="display: inline;"> Background. In modern software development, the use of external libraries and packages is increasingly prevalent, streamlining the software development process and enabling developers to deploy feature-rich systems with little coding. While this reliance on reusing code offers substantial benefits, it also introduces serious risks for deployed software in the form of malicious packages - harmful a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.05259v1-abstract-full').style.display = 'inline'; document.getElementById('2412.05259v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.05259v1-abstract-full" style="display: none;"> Background. In modern software development, the use of external libraries and packages is increasingly prevalent, streamlining the software development process and enabling developers to deploy feature-rich systems with little coding. While this reliance on reusing code offers substantial benefits, it also introduces serious risks for deployed software in the form of malicious packages - harmful and vulnerable code disguised as useful libraries. Aims. Popular ecosystems, such PyPI, receive thousands of new package contributions every week, and distinguishing safe contributions from harmful ones presents a significant challenge. There is a dire need for reliable methods to detect and address the presence of malicious packages in these environments. Method. To address these challenges, we propose a data-driven approach that uses machine learning and static analysis to examine the package's metadata, code, files, and textual characteristics to identify malicious packages. Results. In evaluations conducted within the PyPI ecosystem, we achieved an F1-measure of 0.94 for identifying malicious packages using a stacking ensemble classifier. Conclusions. This tool can be seamlessly integrated into package vetting pipelines and has the capability to flag entire packages, not just malicious function calls. This enhancement strengthens security measures and reduces the manual workload for developers and registry maintainers, thereby contributing to the overall integrity of the ecosystem. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.05259v1-abstract-full').style.display = 'none'; document.getElementById('2412.05259v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 6 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2412.02789">arXiv:2412.02789</a> <span> [<a href="https://arxiv.org/pdf/2412.02789">pdf</a>, <a href="https://arxiv.org/format/2412.02789">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Exploring the Potential of Llama Models in Automated Code Refinement: A Replication Study </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Caumartin%2C+G">Genevieve Caumartin</a>, <a href="/search/cs?searchtype=author&query=Qin%2C+Q">Qiaolin Qin</a>, <a href="/search/cs?searchtype=author&query=Chatragadda%2C+S">Sharon Chatragadda</a>, <a href="/search/cs?searchtype=author&query=Panjrolia%2C+J">Janmitsinh Panjrolia</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Heng Li</a>, <a href="/search/cs?searchtype=author&query=Costa%2C+D+E">Diego Elias Costa</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2412.02789v1-abstract-short" style="display: inline;"> Code reviews are an integral part of software development and have been recognized as a crucial practice for minimizing bugs and favouring higher code quality. They serve as an important checkpoint before committing code and play an essential role in knowledge transfer between developers. However, code reviews can be time-consuming and can stale the development of large software projects. In a r… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.02789v1-abstract-full').style.display = 'inline'; document.getElementById('2412.02789v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2412.02789v1-abstract-full" style="display: none;"> Code reviews are an integral part of software development and have been recognized as a crucial practice for minimizing bugs and favouring higher code quality. They serve as an important checkpoint before committing code and play an essential role in knowledge transfer between developers. However, code reviews can be time-consuming and can stale the development of large software projects. In a recent study, Guo et al. assessed how ChatGPT3.5 can help the code review process. They evaluated the effectiveness of ChatGPT in automating the code refinement tasks, where developers recommend small changes in the submitted code. While Guo et al. 's study showed promising results, proprietary models like ChatGPT pose risks to data privacy and incur extra costs for software projects. In this study, we explore alternatives to ChatGPT in code refinement tasks by including two open-source, smaller-scale large language models: CodeLlama and Llama 2 (7B parameters). Our results show that, if properly tuned, the Llama models, particularly CodeLlama, can achieve reasonable performance, often comparable to ChatGPT in automated code refinement. However, not all code refinement tasks are equally successful: tasks that require changing existing code (e.g., refactoring) are more manageable for models to automate than tasks that demand new code. Our study highlights the potential of open-source models for code refinement, offering cost-effective, privacy-conscious solutions for real-world software development. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2412.02789v1-abstract-full').style.display = 'none'; document.getElementById('2412.02789v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 December, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2408.08148">arXiv:2408.08148</a> <span> [<a href="https://arxiv.org/pdf/2408.08148">pdf</a>, <a href="https://arxiv.org/format/2408.08148">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Early Detection of Performance Regressions by Bridging Local Performance Data and Architectural Models </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Liao%2C+L">Lizhi Liao</a>, <a href="/search/cs?searchtype=author&query=Eismann%2C+S">Simon Eismann</a>, <a href="/search/cs?searchtype=author&query=Li%2C+H">Heng Li</a>, <a href="/search/cs?searchtype=author&query=Bezemer%2C+C">Cor-Paul Bezemer</a>, <a href="/search/cs?searchtype=author&query=Costa%2C+D+E">Diego Elias Costa</a>, <a href="/search/cs?searchtype=author&query=van+Hoorn%2C+A">Andre van Hoorn</a>, <a href="/search/cs?searchtype=author&query=Shang%2C+W">Weiyi Shang</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2408.08148v1-abstract-short" style="display: inline;"> During software development, developers often make numerous modifications to the software to address existing issues or implement new features. However, certain changes may inadvertently have a detrimental impact on the overall system performance. To ensure that the performance of new software releases does not degrade, existing practices rely on system-level performance testing, such as load test… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08148v1-abstract-full').style.display = 'inline'; document.getElementById('2408.08148v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2408.08148v1-abstract-full" style="display: none;"> During software development, developers often make numerous modifications to the software to address existing issues or implement new features. However, certain changes may inadvertently have a detrimental impact on the overall system performance. To ensure that the performance of new software releases does not degrade, existing practices rely on system-level performance testing, such as load testing, or component-level performance testing to detect performance regressions. However, performance testing for the entire system is often expensive and time-consuming, posing challenges to adapting to the rapid release cycles common in modern DevOps practices. System-level performance testing cannot be conducted until the system is fully built and deployed. On the other hand, component-level testing focuses on isolated components, neglecting overall system performance and the impact of system workloads. In this paper, we propose a novel approach to early detection of performance regressions by bridging the local performance data generated by component-level testing and the system-level architectural models. Our approach uses local performance data to identify deviations at the component level, and then propagate these deviations to the architectural model. We then use the architectural model to predict regressions in the performance of the overall system. We evaluate our approach on two open-source benchmark systems and show that it can effectively detect end-to-end system performance regressions from local performance deviations with different intensities and under various system workloads. More importantly, our approach can detect regressions as early as in the development phase, in contrast to existing approaches that require the system to be fully built and deployed. Our approach is lightweight and can complement traditional system performance testing when testing resources are scarce. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2408.08148v1-abstract-full').style.display = 'none'; document.getElementById('2408.08148v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 15 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2407.11955">arXiv:2407.11955</a> <span> [<a href="https://arxiv.org/pdf/2407.11955">pdf</a>, <a href="https://arxiv.org/format/2407.11955">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> A Transformer-based Approach for Augmenting Software Engineering Chatbots Datasets </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Abdellatif%2C+A">Ahmad Abdellatif</a>, <a href="/search/cs?searchtype=author&query=Badran%2C+K">Khaled Badran</a>, <a href="/search/cs?searchtype=author&query=Costa%2C+D+E">Diego Elias Costa</a>, <a href="/search/cs?searchtype=author&query=Shihab%2C+E">Emad Shihab</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2407.11955v1-abstract-short" style="display: inline;"> Background: The adoption of chatbots into software development tasks has become increasingly popular among practitioners, driven by the advantages of cost reduction and acceleration of the software development process. Chatbots understand users' queries through the Natural Language Understanding component (NLU). To yield reasonable performance, NLUs have to be trained with extensive, high-quality… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.11955v1-abstract-full').style.display = 'inline'; document.getElementById('2407.11955v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2407.11955v1-abstract-full" style="display: none;"> Background: The adoption of chatbots into software development tasks has become increasingly popular among practitioners, driven by the advantages of cost reduction and acceleration of the software development process. Chatbots understand users' queries through the Natural Language Understanding component (NLU). To yield reasonable performance, NLUs have to be trained with extensive, high-quality datasets, that express a multitude of ways users may interact with chatbots. However, previous studies show that creating a high-quality training dataset for software engineering chatbots is expensive in terms of both resources and time. Aims: Therefore, in this paper, we present an automated transformer-based approach to augment software engineering chatbot datasets. Method: Our approach combines traditional natural language processing techniques with the BART transformer to augment a dataset by generating queries through synonym replacement and paraphrasing. We evaluate the impact of using the augmentation approach on the Rasa NLU's performance using three software engineering datasets. Results: Overall, the augmentation approach shows promising results in improving the Rasa's performance, augmenting queries with varying sentence structures while preserving their original semantics. Furthermore, it increases Rasa's confidence in its intent classification for the correctly classified intents. Conclusions: We believe that our study helps practitioners improve the performance of their chatbots and guides future research to propose augmentation techniques for SE chatbots. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2407.11955v1-abstract-full').style.display = 'none'; document.getElementById('2407.11955v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 July, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.16340">arXiv:2401.16340</a> <span> [<a href="https://arxiv.org/pdf/2401.16340">pdf</a>, <a href="https://arxiv.org/format/2401.16340">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> The role of library versions in Developer-ChatGPT conversations </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Raj%2C+R">Rachna Raj</a>, <a href="/search/cs?searchtype=author&query=Costa%2C+D+E">Diego Elias Costa</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.16340v1-abstract-short" style="display: inline;"> The latest breakthroughs in large language models (LLM) have empowered software development tools, such as ChatGPT, to aid developers in complex tasks. Developers use ChatGPT to write code, review code changes, and even debug their programs. In these interactions, ChatGPT often recommends code snippets that depend on external libraries. However, code from libraries changes over time, invalidating… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.16340v1-abstract-full').style.display = 'inline'; document.getElementById('2401.16340v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.16340v1-abstract-full" style="display: none;"> The latest breakthroughs in large language models (LLM) have empowered software development tools, such as ChatGPT, to aid developers in complex tasks. Developers use ChatGPT to write code, review code changes, and even debug their programs. In these interactions, ChatGPT often recommends code snippets that depend on external libraries. However, code from libraries changes over time, invalidating a once-correct code snippet and making it difficult to reuse recommended code. In this study, we analyze DevGPT, a dataset of more than 4,000 Developer-ChatGPT interactions, to understand the role of library versions in code-related conversations. We quantify how often library version constraints are mentioned in code-related conversations and when ChatGPT recommends the installation of specific libraries. Our findings show that, albeit to constantly recommend and analyze code with external dependencies, library version constraints only appear in 9% of the conversations. In the majority of conversations, the version constraints are prompted by users (as opposed to being specified by ChatGPT) as a method for receiving better quality responses. Moreover, we study how library version constraints are used in the conversation through qualitative methods, identifying several potential problems that warrant further research. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.16340v1-abstract-full').style.display = 'none'; document.getElementById('2401.16340v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2401.13667">arXiv:2401.13667</a> <span> [<a href="https://arxiv.org/pdf/2401.13667">pdf</a>, <a href="https://arxiv.org/ps/2401.13667">ps</a>, <a href="https://arxiv.org/format/2401.13667">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Predicting the Impact of Crashes Across Release Channels </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mujahid%2C+S">Suhaib Mujahid</a>, <a href="/search/cs?searchtype=author&query=Costa%2C+D+E">Diego Elias Costa</a>, <a href="/search/cs?searchtype=author&query=Castelluccio%2C+M">Marco Castelluccio</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2401.13667v1-abstract-short" style="display: inline;"> Software maintenance faces a persistent challenge with crash bugs, especially across diverse release channels catering to distinct user bases. Nightly builds, favoured by enthusiasts, often reveal crashes that are cheaper to fix but may differ significantly from those in stable releases. In this paper, we emphasize the need for a data-driven solution to predict the impact of crashes happening on n… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.13667v1-abstract-full').style.display = 'inline'; document.getElementById('2401.13667v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2401.13667v1-abstract-full" style="display: none;"> Software maintenance faces a persistent challenge with crash bugs, especially across diverse release channels catering to distinct user bases. Nightly builds, favoured by enthusiasts, often reveal crashes that are cheaper to fix but may differ significantly from those in stable releases. In this paper, we emphasize the need for a data-driven solution to predict the impact of crashes happening on nightly channels once they are released to stable channels. We also list the challenges that need to be considered when approaching this problem. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2401.13667v1-abstract-full').style.display = 'none'; document.getElementById('2401.13667v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> January 2024. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2311.07786">arXiv:2311.07786</a> <span> [<a href="https://arxiv.org/pdf/2311.07786">pdf</a>, <a href="https://arxiv.org/format/2311.07786">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Predicting the First Response Latency of Maintainers and Contributors in Pull Requests </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Khatoonabadi%2C+S">SayedHassan Khatoonabadi</a>, <a href="/search/cs?searchtype=author&query=Abdellatif%2C+A">Ahmad Abdellatif</a>, <a href="/search/cs?searchtype=author&query=Costa%2C+D+E">Diego Elias Costa</a>, <a href="/search/cs?searchtype=author&query=Shihab%2C+E">Emad Shihab</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2311.07786v2-abstract-short" style="display: inline;"> The success of a Pull Request (PR) depends on the responsiveness of the maintainers and the contributor during the review process. Being aware of the expected waiting times can lead to better interactions and managed expectations for both the maintainers and the contributor. In this paper, we propose a machine-learning approach to predict the first response latency of the maintainers following the… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.07786v2-abstract-full').style.display = 'inline'; document.getElementById('2311.07786v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2311.07786v2-abstract-full" style="display: none;"> The success of a Pull Request (PR) depends on the responsiveness of the maintainers and the contributor during the review process. Being aware of the expected waiting times can lead to better interactions and managed expectations for both the maintainers and the contributor. In this paper, we propose a machine-learning approach to predict the first response latency of the maintainers following the submission of a PR, and the first response latency of the contributor after receiving the first response from the maintainers. We curate a dataset of 20 large and popular open-source projects on GitHub and extract 21 features to characterize projects, contributors, PRs, and review processes. Using these features, we then evaluate seven types of classifiers to identify the best-performing models. We also conduct permutation feature importance and SHAP analyses to understand the importance and the impact of different features on the predicted response latencies. We find that our CatBoost models are the most effective for predicting the first response latencies of both maintainers and contributors. We also observe that PRs submitted earlier in the week, containing an average number of commits, and with concise descriptions are more likely to receive faster first responses from the maintainers. Similarly, PRs with a lower first response latency from maintainers, that received the first response of maintainers earlier in the week, and containing an average number of commits tend to receive faster first responses from the contributors. Additionally, contributors with a higher acceptance rate and a history of timely responses in the project are likely to both obtain and provide faster first responses. Moreover, we show the effectiveness of our approach in a cross-project setting. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2311.07786v2-abstract-full').style.display = 'none'; document.getElementById('2311.07786v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 12 August, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 13 November, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Manuscript accepted for publication in IEEE Transactions on Software Engineering (TSE)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.07847">arXiv:2310.07847</a> <span> [<a href="https://arxiv.org/pdf/2310.07847">pdf</a>, <a href="https://arxiv.org/format/2310.07847">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Dependency Practices for Vulnerability Mitigation </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jafari%2C+A+J">Abbas Javan Jafari</a>, <a href="/search/cs?searchtype=author&query=Costa%2C+D+E">Diego Elias Costa</a>, <a href="/search/cs?searchtype=author&query=Abdellatif%2C+A">Ahmad Abdellatif</a>, <a href="/search/cs?searchtype=author&query=Shihab%2C+E">Emad Shihab</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.07847v1-abstract-short" style="display: inline;"> Relying on dependency packages accelerates software development, but it also increases the exposure to security vulnerabilities that may be present in dependencies. While developers have full control over which dependency packages (and which version) they use, they have no control over the dependencies of their dependencies. Such transitive dependencies, which often amount to a greater number than… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.07847v1-abstract-full').style.display = 'inline'; document.getElementById('2310.07847v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.07847v1-abstract-full" style="display: none;"> Relying on dependency packages accelerates software development, but it also increases the exposure to security vulnerabilities that may be present in dependencies. While developers have full control over which dependency packages (and which version) they use, they have no control over the dependencies of their dependencies. Such transitive dependencies, which often amount to a greater number than direct dependencies, can become infected with vulnerabilities and put software projects at risk. To mitigate this risk, Practitioners need to select dependencies that respond quickly to vulnerabilities to prevent the propagation of vulnerable code to their project. To identify such dependencies, we analyze more than 450 vulnerabilities in the npm ecosystem to understand why dependent packages remain vulnerable. We identify over 200,000 npm packages that are infected through their dependencies and use 9 features to build a prediction model that identifies packages that quickly adopt the vulnerability fix and prevent further propagation of vulnerabilities. We also study the relationship between these features and the response speed of vulnerable packages. We complement our work with a practitioner survey to understand the applicability of our findings. Developers can incorporate our findings into their dependency management practices to mitigate the impact of vulnerabilities from their dependency supply chain. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.07847v1-abstract-full').style.display = 'none'; document.getElementById('2310.07847v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.08667">arXiv:2308.08667</a> <span> [<a href="https://arxiv.org/pdf/2308.08667">pdf</a>, <a href="https://arxiv.org/format/2308.08667">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Where to Go Now? Finding Alternatives for Declining Packages in the npm Ecosystem </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mujahid%2C+S">Suhaib Mujahid</a>, <a href="/search/cs?searchtype=author&query=Costa%2C+D+E">Diego Elias Costa</a>, <a href="/search/cs?searchtype=author&query=Abdalkareem%2C+R">Rabe Abdalkareem</a>, <a href="/search/cs?searchtype=author&query=Shihab%2C+E">Emad Shihab</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.08667v1-abstract-short" style="display: inline;"> Software ecosystems (e.g., npm, PyPI) are the backbone of modern software developments. Developers add new packages to ecosystems every day to solve new problems or provide alternative solutions, causing obsolete packages to decline in their importance to the community. Packages in decline are reused less overtime and may become less frequently maintained. Thus, developers usually migrate their de… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.08667v1-abstract-full').style.display = 'inline'; document.getElementById('2308.08667v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.08667v1-abstract-full" style="display: none;"> Software ecosystems (e.g., npm, PyPI) are the backbone of modern software developments. Developers add new packages to ecosystems every day to solve new problems or provide alternative solutions, causing obsolete packages to decline in their importance to the community. Packages in decline are reused less overtime and may become less frequently maintained. Thus, developers usually migrate their dependencies to better alternatives. Replacing packages in decline with better alternatives requires time and effort by developers to identify packages that need to be replaced, find the alternatives, asset migration benefits, and finally, perform the migration. This paper proposes an approach that automatically identifies packages that need to be replaced and finds their alternatives supported with real-world examples of open source projects performing the suggested migrations. At its core, our approach relies on the dependency migration patterns performed in the ecosystem to suggest migrations to other developers. We evaluated our approach on the npm ecosystem and found that 96% of the suggested alternatives are accurate. Furthermore, by surveying expert JavaScript developers, 67% of them indicate that they will use our suggested alternative packages in their future projects. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.08667v1-abstract-full').style.display = 'none'; document.getElementById('2308.08667v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 16 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2307.13777">arXiv:2307.13777</a> <span> [<a href="https://arxiv.org/pdf/2307.13777">pdf</a>, <a href="https://arxiv.org/format/2307.13777">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> </div> </div> <p class="title is-5 mathjax"> An Empirical Study on Bugs Inside PyTorch: A Replication Study </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ho%2C+S+C+Y">Sharon Chee Yin Ho</a>, <a href="/search/cs?searchtype=author&query=Majdinasab%2C+V">Vahid Majdinasab</a>, <a href="/search/cs?searchtype=author&query=Islam%2C+M">Mohayeminul Islam</a>, <a href="/search/cs?searchtype=author&query=Costa%2C+D+E">Diego Elias Costa</a>, <a href="/search/cs?searchtype=author&query=Shihab%2C+E">Emad Shihab</a>, <a href="/search/cs?searchtype=author&query=Khomh%2C+F">Foutse Khomh</a>, <a href="/search/cs?searchtype=author&query=Nadi%2C+S">Sarah Nadi</a>, <a href="/search/cs?searchtype=author&query=Raza%2C+M">Muhammad Raza</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2307.13777v2-abstract-short" style="display: inline;"> Software systems are increasingly relying on deep learning components, due to their remarkable capability of identifying complex data patterns and powering intelligent behaviour. A core enabler of this change in software development is the availability of easy-to-use deep learning libraries. Libraries like PyTorch and TensorFlow empower a large variety of intelligent systems, offering a multitude… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.13777v2-abstract-full').style.display = 'inline'; document.getElementById('2307.13777v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2307.13777v2-abstract-full" style="display: none;"> Software systems are increasingly relying on deep learning components, due to their remarkable capability of identifying complex data patterns and powering intelligent behaviour. A core enabler of this change in software development is the availability of easy-to-use deep learning libraries. Libraries like PyTorch and TensorFlow empower a large variety of intelligent systems, offering a multitude of algorithms and configuration options, applicable to numerous domains of systems. However, bugs in those popular deep learning libraries also may have dire consequences for the quality of systems they enable; thus, it is important to understand how bugs are identified and fixed in those libraries. Inspired by a study of Jia et al., which investigates the bug identification and fixing process at TensorFlow, we characterize bugs in the PyTorch library, a very popular deep learning framework. We investigate the causes and symptoms of bugs identified during PyTorch's development, and assess their locality within the project, and extract patterns of bug fixes. Our results highlight that PyTorch bugs are more like traditional software projects bugs, than related to deep learning characteristics. Finally, we also compare our results with the study on TensorFlow, highlighting similarities and differences across the bug identification and fixing process. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2307.13777v2-abstract-full').style.display = 'none'; document.getElementById('2307.13777v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 1 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 25 July, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.18150">arXiv:2305.18150</a> <span> [<a href="https://arxiv.org/pdf/2305.18150">pdf</a>, <a href="https://arxiv.org/format/2305.18150">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3624739">10.1145/3624739 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Understanding the Helpfulness of Stale Bot for Pull-based Development: An Empirical Study of 20 Large Open-Source Projects </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Khatoonabadi%2C+S">SayedHassan Khatoonabadi</a>, <a href="/search/cs?searchtype=author&query=Costa%2C+D+E">Diego Elias Costa</a>, <a href="/search/cs?searchtype=author&query=Mujahid%2C+S">Suhaib Mujahid</a>, <a href="/search/cs?searchtype=author&query=Shihab%2C+E">Emad Shihab</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.18150v1-abstract-short" style="display: inline;"> Pull Requests (PRs) that are neither progressed nor resolved clutter the list of PRs, making it difficult for the maintainers to manage and prioritize unresolved PRs. To automatically track, follow up, and close such inactive PRs, Stale bot was introduced by GitHub. Despite its increasing adoption, there are ongoing debates on whether using Stale bot alleviates or exacerbates the problem of inacti… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.18150v1-abstract-full').style.display = 'inline'; document.getElementById('2305.18150v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.18150v1-abstract-full" style="display: none;"> Pull Requests (PRs) that are neither progressed nor resolved clutter the list of PRs, making it difficult for the maintainers to manage and prioritize unresolved PRs. To automatically track, follow up, and close such inactive PRs, Stale bot was introduced by GitHub. Despite its increasing adoption, there are ongoing debates on whether using Stale bot alleviates or exacerbates the problem of inactive PRs. To better understand if and how Stale bot helps projects in their pull-based development workflow, we perform an empirical study of 20 large and popular open-source projects. We find that Stale bot can help deal with a backlog of unresolved PRs as the projects closed more PRs within the first few months of adoption. Moreover, Stale bot can help improve the efficiency of the PR review process as the projects reviewed PRs that ended up merged and resolved PRs that ended up closed faster after the adoption. However, Stale bot can also negatively affect the contributors as the projects experienced a considerable decrease in their number of active contributors after the adoption. Therefore, relying solely on Stale bot to deal with inactive PRs may lead to decreased community engagement and an increased probability of contributor abandonment. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.18150v1-abstract-full').style.display = 'none'; document.getElementById('2305.18150v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Manuscript submitted to ACM Transactions on Software Engineering and Methodology</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.15675">arXiv:2305.15675</a> <span> [<a href="https://arxiv.org/pdf/2305.15675">pdf</a>, <a href="https://arxiv.org/format/2305.15675">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Dependency Update Strategies and Package Characteristics </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jafari%2C+A+J">Abbas Javan Jafari</a>, <a href="/search/cs?searchtype=author&query=Costa%2C+D+E">Diego Elias Costa</a>, <a href="/search/cs?searchtype=author&query=Shihab%2C+E">Emad Shihab</a>, <a href="/search/cs?searchtype=author&query=Abdalkareem%2C+R">Rabe Abdalkareem</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.15675v1-abstract-short" style="display: inline;"> Managing project dependencies is a key maintenance issue in software development. Developers need to choose an update strategy that allows them to receive important updates and fixes while protecting them from breaking changes. Semantic Versioning was proposed to address this dilemma but many have opted for more restrictive or permissive alternatives. This empirical study explores the association… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.15675v1-abstract-full').style.display = 'inline'; document.getElementById('2305.15675v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.15675v1-abstract-full" style="display: none;"> Managing project dependencies is a key maintenance issue in software development. Developers need to choose an update strategy that allows them to receive important updates and fixes while protecting them from breaking changes. Semantic Versioning was proposed to address this dilemma but many have opted for more restrictive or permissive alternatives. This empirical study explores the association between package characteristics and the dependency update strategy selected by its dependents to understand how developers select and change their update strategies. We study over 112,000 npm packages and use 19 characteristics to build a prediction model that identifies the common dependency update strategy for each package. Our model achieves a minimum improvement of 72% over the baselines and is much better aligned with community decisions than the npm default strategy. We investigate how different package characteristics can influence the predicted update strategy and find that dependent count, age and release status to be the highest influencing features. We complement the work with qualitative analyses of 160 packages to investigate the evolution of update strategies. While the common update strategy remains consistent for many packages, certain events such as the release of the 1.0.0 version or breaking changes influence the selected update strategy over time. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.15675v1-abstract-full').style.display = 'none'; document.getElementById('2305.15675v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 24 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.00042">arXiv:2302.00042</a> <span> [<a href="https://arxiv.org/pdf/2302.00042">pdf</a>, <a href="https://arxiv.org/format/2302.00042">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Diversity Awareness in Software Engineering Participant Research </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Dutta%2C+R">Riya Dutta</a>, <a href="/search/cs?searchtype=author&query=Costa%2C+D+E">Diego Elias Costa</a>, <a href="/search/cs?searchtype=author&query=Shihab%2C+E">Emad Shihab</a>, <a href="/search/cs?searchtype=author&query=Tajmel%2C+T">Tanja Tajmel</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.00042v1-abstract-short" style="display: inline;"> Diversity and inclusion are necessary prerequisites for shaping technological innovation that benefits society as a whole. A common indicator of diversity consideration is the representation of different social groups among software engineering (SE) researchers, developers, and students. However, this does not necessarily entail that diversity is considered in the SE research itself. In our stud… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.00042v1-abstract-full').style.display = 'inline'; document.getElementById('2302.00042v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.00042v1-abstract-full" style="display: none;"> Diversity and inclusion are necessary prerequisites for shaping technological innovation that benefits society as a whole. A common indicator of diversity consideration is the representation of different social groups among software engineering (SE) researchers, developers, and students. However, this does not necessarily entail that diversity is considered in the SE research itself. In our study, we examine how diversity is embedded in SE research, particularly research that involves participant studies. To this end, we have selected 79 research papers containing 105 participant studies spanning three years of ICSE technical tracks. Using a content analytical approach, we identified how SE researchers report the various diversity categories of their study participants and investigated: 1) the extent to which participants are described, 2) what diversity categories are commonly reported, and 3) the function diversity serves in the SE studies. We identified 12 different diversity categories reported in SE participant studies. Our results demonstrate that even though most SE studies report on the diversity of participants, SE research often emphasizes professional diversity data, such as occupation and work experience, over social diversity data, such as gender or location of the participants. Furthermore, our results show that participant diversity is seldom analyzed or reflected upon when SE researchers discuss their study results, outcome or limitations. To help researchers self-assess their study diversity awareness, we propose a diversity awareness model and guidelines that SE researchers can apply to their research. With this study, we hope to shed light on a new approach to tackling the diversity and inclusion crisis in the SE field. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.00042v1-abstract-full').style.display = 'none'; document.getElementById('2302.00042v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 January, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2212.02614">arXiv:2212.02614</a> <span> [<a href="https://arxiv.org/pdf/2212.02614">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computers and Society">cs.CY</span> </div> </div> <p class="title is-5 mathjax"> Can Ensembling Pre-processing Algorithms Lead to Better Machine Learning Fairness? </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Badran%2C+K">Khaled Badran</a>, <a href="/search/cs?searchtype=author&query=C%C3%B4t%C3%A9%2C+P">Pierre-Olivier C么t茅</a>, <a href="/search/cs?searchtype=author&query=Kolopanis%2C+A">Amanda Kolopanis</a>, <a href="/search/cs?searchtype=author&query=Bouchoucha%2C+R">Rached Bouchoucha</a>, <a href="/search/cs?searchtype=author&query=Collante%2C+A">Antonio Collante</a>, <a href="/search/cs?searchtype=author&query=Costa%2C+D+E">Diego Elias Costa</a>, <a href="/search/cs?searchtype=author&query=Shihab%2C+E">Emad Shihab</a>, <a href="/search/cs?searchtype=author&query=Khomh%2C+F">Foutse Khomh</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2212.02614v1-abstract-short" style="display: inline;"> As machine learning (ML) systems get adopted in more critical areas, it has become increasingly crucial to address the bias that could occur in these systems. Several fairness pre-processing algorithms are available to alleviate implicit biases during model training. These algorithms employ different concepts of fairness, often leading to conflicting strategies with consequential trade-offs betwee… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.02614v1-abstract-full').style.display = 'inline'; document.getElementById('2212.02614v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2212.02614v1-abstract-full" style="display: none;"> As machine learning (ML) systems get adopted in more critical areas, it has become increasingly crucial to address the bias that could occur in these systems. Several fairness pre-processing algorithms are available to alleviate implicit biases during model training. These algorithms employ different concepts of fairness, often leading to conflicting strategies with consequential trade-offs between fairness and accuracy. In this work, we evaluate three popular fairness pre-processing algorithms and investigate the potential for combining all algorithms into a more robust pre-processing ensemble. We report on lessons learned that can help practitioners better select fairness algorithms for their models. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2212.02614v1-abstract-full').style.display = 'none'; document.getElementById('2212.02614v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 5 December, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2208.05860">arXiv:2208.05860</a> <span> [<a href="https://arxiv.org/pdf/2208.05860">pdf</a>, <a href="https://arxiv.org/format/2208.05860">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Achievement Unlocked: A Case Study on Gamifying DevOps Practices in Industry </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Ayoup%2C+P">Patrick Ayoup</a>, <a href="/search/cs?searchtype=author&query=Costa%2C+D+E">Diego Elias Costa</a>, <a href="/search/cs?searchtype=author&query=Shihab%2C+E">Emad Shihab</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2208.05860v1-abstract-short" style="display: inline;"> Gamification is the use of game elements such as points, leaderboards, and badges in a non-game context to encourage a desired behavior from individuals interacting with an environment. Recently, gamification has found its way into software engineering contexts as a means to promote certain activities to practitioners. Previous studies investigated the use of gamification to promote the adoption o… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.05860v1-abstract-full').style.display = 'inline'; document.getElementById('2208.05860v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2208.05860v1-abstract-full" style="display: none;"> Gamification is the use of game elements such as points, leaderboards, and badges in a non-game context to encourage a desired behavior from individuals interacting with an environment. Recently, gamification has found its way into software engineering contexts as a means to promote certain activities to practitioners. Previous studies investigated the use of gamification to promote the adoption of a variety of tools and practices, however, these studies were either performed in an educational environment or in small to medium-sized teams of developers in the industry. We performed a large-scale mixed-methods study on the effects of badge-based gamification in promoting the adoption of DevOps practices in a very large company and evaluated how practice adoption is associated with changes in key delivery, quality, and throughput metrics of 333 software projects. We observed an accelerated adoption of some gamified DevOps practices by at least 60%, with increased adoption rates up to 6x. We found mixed results when associating badge adoption and metric changes: teams that earned testing badges showed an increase in bug fixing commits but output fewer commits and pull requests; teams that earned code review and quality tooling badges exhibited faster delivery metrics. Finally, our empirical study was supplemented by a survey with 45 developers where 73% of respondents found badges to be helpful for learning about and adopting new standardized practices. Our results contribute to the rich knowledge on gamification with a unique and important perspective from real industry practitioners. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2208.05860v1-abstract-full').style.display = 'none'; document.getElementById('2208.05860v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2207.14711">arXiv:2207.14711</a> <span> [<a href="https://arxiv.org/pdf/2207.14711">pdf</a>, <a href="https://arxiv.org/format/2207.14711">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Not All Dependencies are Equal: An Empirical Study on Production Dependencies in NPM </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Latendresse%2C+J">Jasmine Latendresse</a>, <a href="/search/cs?searchtype=author&query=Mujahid%2C+S">Suhaib Mujahid</a>, <a href="/search/cs?searchtype=author&query=Costa%2C+D+E">Diego Elias Costa</a>, <a href="/search/cs?searchtype=author&query=Shihab%2C+E">Emad Shihab</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2207.14711v2-abstract-short" style="display: inline;"> Modern software systems are often built by leveraging code written by others in the form of libraries and packages to accelerate their development. While there are many benefits to using third-party packages, software projects often become dependent on a large number of software packages. Consequently, developers are faced with the difficult challenge of maintaining their project dependencies by k… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.14711v2-abstract-full').style.display = 'inline'; document.getElementById('2207.14711v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2207.14711v2-abstract-full" style="display: none;"> Modern software systems are often built by leveraging code written by others in the form of libraries and packages to accelerate their development. While there are many benefits to using third-party packages, software projects often become dependent on a large number of software packages. Consequently, developers are faced with the difficult challenge of maintaining their project dependencies by keeping them up-to-date and free of security vulnerabilities. However, how often are project dependencies used in production where they could pose a threat to their project's security? We conduct an empirical study on 100 JavaScript projects using the Node Package Manager (npm) to quantify how often project dependencies are released to production and analyze their characteristics and their impact on security. Our results indicate that less than 1% of the installed dependencies are released to production. Our analysis reveals that the functionality of a package is not enough to determine if it will be released to production or not. In fact, 59% of the installed dependencies configured as runtime dependencies are not used in production, and 28.2% of the dependencies configured as development dependencies are used in production, debunking two common assumptions of dependency management. Findings also indicate that most security alerts target dependencies not used in production, making them highly unlikely to be a risk for the security of the software. Our study unveils a more complex side of dependency management: not all dependencies are equal. Dependencies used in production are more sensitive to security exposure and should be prioritized. However, current tools lack the appropriate support in identifying production dependencies. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2207.14711v2-abstract-full').style.display = 'none'; document.getElementById('2207.14711v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 29 August, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 29 July, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2022. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">The 37th IEEE/ACM International Conference on Automated Software Engineering</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2206.10358">arXiv:2206.10358</a> <span> [<a href="https://arxiv.org/pdf/2206.10358">pdf</a>, <a href="https://arxiv.org/format/2206.10358">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> Open Source Software: An Approach to Controlling Usage and Risk in Application Ecosystems </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Zajdel%2C+S">Stan Zajdel</a>, <a href="/search/cs?searchtype=author&query=Costa%2C+D+E">Diego Elias Costa</a>, <a href="/search/cs?searchtype=author&query=Mili%2C+H">Hafedh Mili</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2206.10358v1-abstract-short" style="display: inline;"> The Open Source Software movement has been growing exponentially for a number of years with no signs of slowing. Driving this growth is the widespread availability of libraries and frameworks that provide many functionalities. Developers are saving time and money incorporating this functionality into their applications resulting in faster more feature-rich releases. Despite the growing success and… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.10358v1-abstract-full').style.display = 'inline'; document.getElementById('2206.10358v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2206.10358v1-abstract-full" style="display: none;"> The Open Source Software movement has been growing exponentially for a number of years with no signs of slowing. Driving this growth is the widespread availability of libraries and frameworks that provide many functionalities. Developers are saving time and money incorporating this functionality into their applications resulting in faster more feature-rich releases. Despite the growing success and the advantages that open source software provides, there is a dark side. Due to its community construction and largely unregulated distribution, the majority of open source software contains bugs, vulnerabilities and other issues making it highly susceptible to exploits. The lack of oversight, in general, hinders the quality of this software resulting in a trickle-down effect in the applications that use it. Additionally, developers who use open source tend to arbitrarily download the software into their build systems but rarely keep track of what they have downloaded resulting in an excessive amount of open source software in their applications and in their ecosystem. This paper discusses processes and practices that users of open source software can implement into their environments that can safely track and control the introduction and usage of open source software into their applications, and report on some preliminary results obtained in an industrial context. We conclude by discussing governance issues related to the disciplined use and reuse of open source and areas for further improvements. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2206.10358v1-abstract-full').style.display = 'none'; document.getElementById('2206.10358v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 21 June, 2022; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2022. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2110.15447">arXiv:2110.15447</a> <span> [<a href="https://arxiv.org/pdf/2110.15447">pdf</a>, <a href="https://arxiv.org/format/2110.15447">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1145/3530785">10.1145/3530785 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> On Wasted Contributions: Understanding the Dynamics of Contributor-Abandoned Pull Requests </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Khatoonabadi%2C+S">SayedHassan Khatoonabadi</a>, <a href="/search/cs?searchtype=author&query=Costa%2C+D+E">Diego Elias Costa</a>, <a href="/search/cs?searchtype=author&query=Abdalkareem%2C+R">Rabe Abdalkareem</a>, <a href="/search/cs?searchtype=author&query=Shihab%2C+E">Emad Shihab</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2110.15447v2-abstract-short" style="display: inline;"> Pull-based development has enabled numerous volunteers to contribute to open-source projects with fewer barriers. Nevertheless, a considerable amount of pull requests (PRs) with valid contributions are abandoned by their contributors, wasting the effort and time put in by both the contributors and maintainers. To better understand the underlying dynamics of contributor-abandoned PRs, we conduct a… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.15447v2-abstract-full').style.display = 'inline'; document.getElementById('2110.15447v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2110.15447v2-abstract-full" style="display: none;"> Pull-based development has enabled numerous volunteers to contribute to open-source projects with fewer barriers. Nevertheless, a considerable amount of pull requests (PRs) with valid contributions are abandoned by their contributors, wasting the effort and time put in by both the contributors and maintainers. To better understand the underlying dynamics of contributor-abandoned PRs, we conduct a mixed-methods study using both quantitative and qualitative methods. We curate a dataset consisting of 265,325 PRs including 4,450 abandoned ones from ten popular and mature GitHub projects and measure 16 features characterizing PRs, contributors, review processes, and projects. Using statistical and machine learning techniques, we find that complex PRs, novice contributors, and lengthy reviews have a higher probability of abandonment and the rate of PR abandonment fluctuates alongside the projects' maturity or workload. To identify why contributors abandon their PRs, we also manually examine a random sample of 354 abandoned PRs. We observe that the most frequent abandonment reasons are related to the obstacles faced by contributors, followed by the hurdles imposed by maintainers during the review process. Finally, we survey the top core maintainers of the studied projects to understand their perspectives on dealing with PR abandonment and on our findings. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2110.15447v2-abstract-full').style.display = 'none'; document.getElementById('2110.15447v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 20 May, 2022; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 28 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Manuscript accepted for publication in ACM Transactions on Software Engineering and Methodology (TOSEM)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2107.13320">arXiv:2107.13320</a> <span> [<a href="https://arxiv.org/pdf/2107.13320">pdf</a>, <a href="https://arxiv.org/format/2107.13320">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> A Case Study on the Stability of Performance Tests for Serverless Applications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Eismann%2C+S">Simon Eismann</a>, <a href="/search/cs?searchtype=author&query=Costa%2C+D+E">Diego Elias Costa</a>, <a href="/search/cs?searchtype=author&query=Liao%2C+L">Lizhi Liao</a>, <a href="/search/cs?searchtype=author&query=Bezemer%2C+C">Cor-Paul Bezemer</a>, <a href="/search/cs?searchtype=author&query=Shang%2C+W">Weiyi Shang</a>, <a href="/search/cs?searchtype=author&query=van+Hoorn%2C+A">Andr茅 van Hoorn</a>, <a href="/search/cs?searchtype=author&query=Kounev%2C+S">Samuel Kounev</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2107.13320v1-abstract-short" style="display: inline;"> Context. While in serverless computing, application resource management and operational concerns are generally delegated to the cloud provider, ensuring that serverless applications meet their performance requirements is still a responsibility of the developers. Performance testing is a commonly used performance assessment practice; however, it traditionally requires visibility of the resource env… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.13320v1-abstract-full').style.display = 'inline'; document.getElementById('2107.13320v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2107.13320v1-abstract-full" style="display: none;"> Context. While in serverless computing, application resource management and operational concerns are generally delegated to the cloud provider, ensuring that serverless applications meet their performance requirements is still a responsibility of the developers. Performance testing is a commonly used performance assessment practice; however, it traditionally requires visibility of the resource environment. Objective. In this study, we investigate whether performance tests of serverless applications are stable, that is, if their results are reproducible, and what implications the serverless paradigm has for performance tests. Method. We conduct a case study where we collect two datasets of performance test results: (a) repetitions of performance tests for varying memory size and load intensities and (b) three repetitions of the same performance test every day for ten months. Results. We find that performance tests of serverless applications are comparatively stable if conducted on the same day. However, we also observe short-term performance variations and frequent long-term performance changes. Conclusion. Performance tests for serverless applications can be stable; however, the serverless model impacts the planning, execution, and analysis of performance tests. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.13320v1-abstract-full').style.display = 'none'; document.getElementById('2107.13320v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 28 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> C.4; D.2 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2107.10168">arXiv:2107.10168</a> <span> [<a href="https://arxiv.org/pdf/2107.10168">pdf</a>, <a href="https://arxiv.org/format/2107.10168">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TEM.2021.3122012">10.1109/TEM.2021.3122012 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Towards Using Package Centrality Trend to Identify Packages in Decline </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Mujahid%2C+S">Suhaib Mujahid</a>, <a href="/search/cs?searchtype=author&query=Costa%2C+D+E">Diego Elias Costa</a>, <a href="/search/cs?searchtype=author&query=Abdalkareem%2C+R">Rabe Abdalkareem</a>, <a href="/search/cs?searchtype=author&query=Shihab%2C+E">Emad Shihab</a>, <a href="/search/cs?searchtype=author&query=Saied%2C+M+A">Mohamed Aymen Saied</a>, <a href="/search/cs?searchtype=author&query=Adams%2C+B">Bram Adams</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2107.10168v3-abstract-short" style="display: inline;"> Due to their increasing complexity, today's software systems are frequently built by leveraging reusable code in the form of libraries and packages. Software ecosystems (e.g., npm) are the primary enablers of this code reuse, providing developers with a platform to share their own and use others' code. These ecosystems evolve rapidly: developers add new packages every day to solve new problems or… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.10168v3-abstract-full').style.display = 'inline'; document.getElementById('2107.10168v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2107.10168v3-abstract-full" style="display: none;"> Due to their increasing complexity, today's software systems are frequently built by leveraging reusable code in the form of libraries and packages. Software ecosystems (e.g., npm) are the primary enablers of this code reuse, providing developers with a platform to share their own and use others' code. These ecosystems evolve rapidly: developers add new packages every day to solve new problems or provide alternative solutions, causing obsolete packages to decline in their importance to the community. Developers should avoid depending on packages in decline, as these packages are reused less over time and may become less frequently maintained. However, current popularity metrics (e.g., Stars, and Downloads) are not fit to provide this information to developers because their semantics do not aptly capture shifts in the community interest. In this paper, we propose a scalable approach that uses the package's centrality in the ecosystem to identify packages in decline. We evaluate our approach with the npm ecosystem and show that the trends of centrality over time can correctly distinguish packages in decline with an ROC-AUC of 0.9. The approach can capture 87% of the packages in decline, on average 18 months before the trend is shown in currently used package popularity metrics. We implement this approach in a tool that can be used to augment the npms metrics and help developers avoid packages in decline when reusing packages from npm. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2107.10168v3-abstract-full').style.display = 'none'; document.getElementById('2107.10168v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 19 October, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 21 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> July 2021. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Accepted in the Special Issue on Collaboration and Innovation Dynamics in Software Ecosystems</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">ACM Class:</span> D.2.0; D.2.13 </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Transactions on Engineering Management Journal (TEM), 2021 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2012.02640">arXiv:2012.02640</a> <span> [<a href="https://arxiv.org/pdf/2012.02640">pdf</a>, <a href="https://arxiv.org/format/2012.02640">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TSE.2021.3078384">10.1109/TSE.2021.3078384 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> A Comparison of Natural Language Understanding Platforms for Chatbots in Software Engineering </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Abdellatif%2C+A">Ahmad Abdellatif</a>, <a href="/search/cs?searchtype=author&query=Badran%2C+K">Khaled Badran</a>, <a href="/search/cs?searchtype=author&query=Costa%2C+D+E">Diego Elias Costa</a>, <a href="/search/cs?searchtype=author&query=Shihab%2C+E">Emad Shihab</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2012.02640v2-abstract-short" style="display: inline;"> Chatbots are envisioned to dramatically change the future of Software Engineering, allowing practitioners to chat and inquire about their software projects and interact with different services using natural language. At the heart of every chatbot is a Natural Language Understanding (NLU) component that enables the chatbot to understand natural language input. Recently, many NLU platforms were prov… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.02640v2-abstract-full').style.display = 'inline'; document.getElementById('2012.02640v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2012.02640v2-abstract-full" style="display: none;"> Chatbots are envisioned to dramatically change the future of Software Engineering, allowing practitioners to chat and inquire about their software projects and interact with different services using natural language. At the heart of every chatbot is a Natural Language Understanding (NLU) component that enables the chatbot to understand natural language input. Recently, many NLU platforms were provided to serve as an off-the-shelf NLU component for chatbots, however, selecting the best NLU for Software Engineering chatbots remains an open challenge. Therefore, in this paper, we evaluate four of the most commonly used NLUs, namely IBM Watson, Google Dialogflow, Rasa, and Microsoft LUIS to shed light on which NLU should be used in Software Engineering based chatbots. Specifically, we examine the NLUs' performance in classifying intents, confidence scores stability, and extracting entities. To evaluate the NLUs, we use two datasets that reflect two common tasks performed by Software Engineering practitioners, 1) the task of chatting with the chatbot to ask questions about software repositories 2) the task of asking development questions on Q&A forums (e.g., Stack Overflow). According to our findings, IBM Watson is the best performing NLU when considering the three aspects (intents classification, confidence scores, and entity extraction). However, the results from each individual aspect show that, in intents classification, IBM Watson performs the best with an F1-measure > 84%, but in confidence scores, Rasa comes on top with a median confidence score higher than 0.91. Our results also show that all NLUs, except for Dialogflow, generally provide trustable confidence scores. For entity extraction, Microsoft LUIS and IBM Watson outperform other NLUs in the two SE tasks. Our results provide guidance to software engineering practitioners when deciding which NLU to use in their chatbots. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2012.02640v2-abstract-full').style.display = 'none'; document.getElementById('2012.02640v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 4 December, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> December 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Transactions on Software Engineering (TSE), 2021 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2010.14573">arXiv:2010.14573</a> <span> [<a href="https://arxiv.org/pdf/2010.14573">pdf</a>, <a href="https://arxiv.org/format/2010.14573">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TSE.2021.3106247">10.1109/TSE.2021.3106247 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Dependency Smells in JavaScript Projects </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Jafari%2C+A+J">Abbas Javan Jafari</a>, <a href="/search/cs?searchtype=author&query=Costa%2C+D+E">Diego Elias Costa</a>, <a href="/search/cs?searchtype=author&query=Abdalkareem%2C+R">Rabe Abdalkareem</a>, <a href="/search/cs?searchtype=author&query=Shihab%2C+E">Emad Shihab</a>, <a href="/search/cs?searchtype=author&query=Tsantalis%2C+N">Nikolaos Tsantalis</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2010.14573v2-abstract-short" style="display: inline;"> Dependency management in modern software development poses many challenges for developers who wish to stay up to date with the latest features and fixes whilst ensuring backwards compatibility. Project maintainers have opted for varied, and sometimes conflicting, approaches for maintaining their dependencies. Opting for unsuitable approaches can introduce bugs and vulnerabilities into the project,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.14573v2-abstract-full').style.display = 'inline'; document.getElementById('2010.14573v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2010.14573v2-abstract-full" style="display: none;"> Dependency management in modern software development poses many challenges for developers who wish to stay up to date with the latest features and fixes whilst ensuring backwards compatibility. Project maintainers have opted for varied, and sometimes conflicting, approaches for maintaining their dependencies. Opting for unsuitable approaches can introduce bugs and vulnerabilities into the project, introduce breaking changes, cause extraneous installations, and reduce dependency understandability, making it harder for others to contribute effectively. In this paper, we empirically examine evidence of recurring dependency management issues (dependency smells). We look at the commit data for a dataset of 1,146 active JavaScript repositories to catalog, quantify and understand dependency smells. Through a series of surveys with practitioners, we identify and quantify seven dependency smells with varying degrees of popularity and investigate why they are introduced throughout project history. Our findings indicate that dependency smells are prevalent in JavaScript projects with two or more distinct smells appearing in 80% of the projects, but they generally infect a minority of a project's dependencies. Our observations show that the number of dependency smells tend to increase over time. Practitioners agree that dependency smells bring about many problems including security threats, bugs, dependency breakage, runtime errors, and other maintenance issues. These smells are generally introduced as developers react to dependency misbehaviour and the shortcomings of the npm ecosystem. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2010.14573v2-abstract-full').style.display = 'none'; document.getElementById('2010.14573v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 August, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 27 October, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">Keywords: Dependency smells, Software ecosystems, Dependency management, npm</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Transactions on Software Engineering (TSE), 2021 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2009.09019">arXiv:2009.09019</a> <span> [<a href="https://arxiv.org/pdf/2009.09019">pdf</a>, <a href="https://arxiv.org/format/2009.09019">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> </div> </div> <p class="title is-5 mathjax"> On the Threat of npm Vulnerable Dependencies in Node.js Applications </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Alfadel%2C+M">Mahmoud Alfadel</a>, <a href="/search/cs?searchtype=author&query=Costa%2C+D+E">Diego Elias Costa</a>, <a href="/search/cs?searchtype=author&query=Mokhallalati%2C+M">Mouafak Mokhallalati</a>, <a href="/search/cs?searchtype=author&query=Shihab%2C+E">Emad Shihab</a>, <a href="/search/cs?searchtype=author&query=Adams%2C+B">Bram Adams</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2009.09019v1-abstract-short" style="display: inline;"> Software vulnerabilities have a large negative impact on the software systems that we depend on daily. Reports on software vulnerabilities always paint a grim picture, with some reports showing that 83% of organizations depend on vulnerable software. However, our experience leads us to believe that, in the grand scheme of things, these software vulnerabilities may have less impact than what is rep… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.09019v1-abstract-full').style.display = 'inline'; document.getElementById('2009.09019v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2009.09019v1-abstract-full" style="display: none;"> Software vulnerabilities have a large negative impact on the software systems that we depend on daily. Reports on software vulnerabilities always paint a grim picture, with some reports showing that 83% of organizations depend on vulnerable software. However, our experience leads us to believe that, in the grand scheme of things, these software vulnerabilities may have less impact than what is reported. Therefore, we perform a study to better understand the threat of npm vulnerable packages used in Node.js applications. We define three threat levels for vulnerabilities in packages, based on their lifecycle, where a package vulnerability is assigned a low threat level if it was hidden or still unknown at the time it was used in the dependent application (t), medium threat level if the vulnerability was reported but not yet published at t, and high if it was publicly announced at t. Then, we perform an empirical study involving 6,673 real-world, active, and mature open source Node.js applications. Our findings show that although 67.93% of the examined applications depend on at least one vulnerable package, 94.91% of the vulnerable packages in those affected applications are classified as having low threat. Moreover, we find that in the case of vulnerable packages classified as having high threat, it is the application's lack of updating that makes them vulnerable, i.e., it is not the existence of the vulnerability that is the real problem. Furthermore, we verify our findings at different stages of the application's lifetime and find that our findings still hold. Our study argues that when it comes to software vulnerabilities, things may not be as bad as they seem and that considering vulnerability threat is key. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2009.09019v1-abstract-full').style.display = 'none'; document.getElementById('2009.09019v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 18 September, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> September 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2006.09973">arXiv:2006.09973</a> <span> [<a href="https://arxiv.org/pdf/2006.09973">pdf</a>, <a href="https://arxiv.org/format/2006.09973">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Software Engineering">cs.SE</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Programming Languages">cs.PL</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/TSE.2021.3057720">10.1109/TSE.2021.3057720 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Breaking Type Safety in Go: An Empirical Study on the Usage of the unsafe Package </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Costa%2C+D+E">Diego Elias Costa</a>, <a href="/search/cs?searchtype=author&query=Mujahid%2C+S">Suhaib Mujahid</a>, <a href="/search/cs?searchtype=author&query=Abdalkareem%2C+R">Rabe Abdalkareem</a>, <a href="/search/cs?searchtype=author&query=Shihab%2C+E">Emad Shihab</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2006.09973v4-abstract-short" style="display: inline;"> A decade after its first release, the Go programming language has become a major programming language in the development landscape. While praised for its clean syntax and C-like performance, Go also contains a strong static type-system that prevents arbitrary type casting and arbitrary memory access, making the language type-safe by design. However, to give developers the possibility of implementi… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.09973v4-abstract-full').style.display = 'inline'; document.getElementById('2006.09973v4-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2006.09973v4-abstract-full" style="display: none;"> A decade after its first release, the Go programming language has become a major programming language in the development landscape. While praised for its clean syntax and C-like performance, Go also contains a strong static type-system that prevents arbitrary type casting and arbitrary memory access, making the language type-safe by design. However, to give developers the possibility of implementing low-level code, Go ships with a special package called unsafe that offers developers a way around the type-safety of Go programs. The package gives greater flexibility to developers but comes at a higher risk of runtime errors, chances of non-portability, and the loss of compatibility guarantees for future versions of Go. In this paper, we present the first large-scale study on the usage of the unsafe package in 2,438 popular Go projects. Our investigation shows that unsafe is used in 24% of Go projects, motivated primarily by communicating with operating systems and C code, but is also commonly used as a source of performance optimization. Developers are willing to use unsafe to break language specifications (e.g., string immutability) for better performance and 6% of analyzed projects that use unsafe perform risky pointer conversions that can lead to program crashes and unexpected behavior. Furthermore, we report a series of real issues faced by projects that use unsafe, from crashing errors and non-deterministic behavior to having their deployment restricted from certain popular environments. Our findings can be used to understand how and why developers break type-safety in Go, and help motivate further tools and language development that could make the usage of unsafe in Go even safer. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2006.09973v4-abstract-full').style.display = 'none'; document.getElementById('2006.09973v4-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 17 June, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2020. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> IEEE Transactions on Software Engineering (TSE), 2021 </p> </li> </ol> <div class="is-hidden-tablet">  <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a>  </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary">  <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div>   <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div>  </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>

CINXE.COM

Search | arXiv e-print repository