CINXE.COM
Search | arXiv e-print repository
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"/> <meta name="viewport" content="width=device-width, initial-scale=1"/> <!-- new favicon config and versions by realfavicongenerator.net --> <link rel="apple-touch-icon" sizes="180x180" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/apple-touch-icon.png"> <link rel="icon" type="image/png" sizes="32x32" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-32x32.png"> <link rel="icon" type="image/png" sizes="16x16" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon-16x16.png"> <link rel="manifest" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/site.webmanifest"> <link rel="mask-icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/safari-pinned-tab.svg" color="#b31b1b"> <link rel="shortcut icon" href="https://static.arxiv.org/static/base/1.0.0a5/images/icons/favicon.ico"> <meta name="msapplication-TileColor" content="#b31b1b"> <meta name="msapplication-config" content="images/icons/browserconfig.xml"> <meta name="theme-color" content="#b31b1b"> <!-- end favicon config --> <title>Search | arXiv e-print repository</title> <script defer src="https://static.arxiv.org/static/base/1.0.0a5/fontawesome-free-5.11.2-web/js/all.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/base/1.0.0a5/css/arxivstyle.css" /> <script type="text/x-mathjax-config"> MathJax.Hub.Config({ messageStyle: "none", extensions: ["tex2jax.js"], jax: ["input/TeX", "output/HTML-CSS"], tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, ignoreClass: '.*', processClass: 'mathjax.*' }, TeX: { extensions: ["AMSmath.js", "AMSsymbols.js", "noErrors.js"], noErrors: { inlineDelimiters: ["$","$"], multiLine: false, style: { "font-size": "normal", "border": "" } } }, "HTML-CSS": { availableFonts: ["TeX"] } }); </script> <script src='//static.arxiv.org/MathJax-2.7.3/MathJax.js'></script> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/notification.js"></script> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/bulma-tooltip.min.css" /> <link rel="stylesheet" href="https://static.arxiv.org/static/search/0.5.6/css/search.css" /> <script src="https://code.jquery.com/jquery-3.2.1.slim.min.js" integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g=" crossorigin="anonymous"></script> <script src="https://static.arxiv.org/static/search/0.5.6/js/fieldset.js"></script> <style> radio#cf-customfield_11400 { display: none; } </style> </head> <body> <header><a href="#main-container" class="is-sr-only">Skip to main content</a> <!-- contains Cornell logo and sponsor statement --> <div class="attribution level is-marginless" role="banner"> <div class="level-left"> <a class="level-item" href="https://cornell.edu/"><img src="https://static.arxiv.org/static/base/1.0.0a5/images/cornell-reduced-white-SMALL.svg" alt="Cornell University" width="200" aria-label="logo" /></a> </div> <div class="level-right is-marginless"><p class="sponsors level-item is-marginless"><span id="support-ack-url">We gratefully acknowledge support from<br /> the Simons Foundation, <a href="https://info.arxiv.org/about/ourmembers.html">member institutions</a>, and all contributors. <a href="https://info.arxiv.org/about/donate.html">Donate</a></span></p></div> </div> <!-- contains arXiv identity and search bar --> <div class="identity level is-marginless"> <div class="level-left"> <div class="level-item"> <a class="arxiv" href="https://arxiv.org/" aria-label="arxiv-logo"> <img src="https://static.arxiv.org/static/base/1.0.0a5/images/arxiv-logo-one-color-white.svg" aria-label="logo" alt="arxiv logo" width="85" style="width:85px;"/> </a> </div> </div> <div class="search-block level-right"> <form class="level-item mini-search" method="GET" action="https://arxiv.org/search"> <div class="field has-addons"> <div class="control"> <input class="input is-small" type="text" name="query" placeholder="Search..." aria-label="Search term or terms" /> <p class="help"><a href="https://info.arxiv.org/help">Help</a> | <a href="https://arxiv.org/search/advanced">Advanced Search</a></p> </div> <div class="control"> <div class="select is-small"> <select name="searchtype" aria-label="Field to search"> <option value="all" selected="selected">All fields</option> <option value="title">Title</option> <option value="author">Author</option> <option value="abstract">Abstract</option> <option value="comments">Comments</option> <option value="journal_ref">Journal reference</option> <option value="acm_class">ACM classification</option> <option value="msc_class">MSC classification</option> <option value="report_num">Report number</option> <option value="paper_id">arXiv identifier</option> <option value="doi">DOI</option> <option value="orcid">ORCID</option> <option value="author_id">arXiv author ID</option> <option value="help">Help pages</option> <option value="full_text">Full text</option> </select> </div> </div> <input type="hidden" name="source" value="header"> <button class="button is-small is-cul-darker">Search</button> </div> </form> </div> </div> <!-- closes identity --> <div class="container"> <div class="user-tools is-size-7 has-text-right has-text-weight-bold" role="navigation" aria-label="User menu"> <a href="https://arxiv.org/login">Login</a> </div> </div> </header> <main class="container" id="main-container"> <div class="level is-marginless"> <div class="level-left"> <h1 class="title is-clearfix"> Showing 1–10 of 10 results for author: <span class="mathjax">Genc, H</span> </h1> </div> <div class="level-right is-hidden-mobile"> <!-- feedback for mobile is moved to footer --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> <div class="content"> <form method="GET" action="/search/cs" aria-role="search"> Searching in archive <strong>cs</strong>. <a href="/search/?searchtype=author&query=Genc%2C+H">Search in all archives.</a> <div class="field has-addons-tablet"> <div class="control is-expanded"> <label for="query" class="hidden-label">Search term or terms</label> <input class="input is-medium" id="query" name="query" placeholder="Search term..." type="text" value="Genc, H"> </div> <div class="select control is-medium"> <label class="is-hidden" for="searchtype">Field</label> <select class="is-medium" id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> </div> <div class="control"> <button class="button is-link is-medium">Search</button> </div> </div> <div class="field"> <div class="control is-size-7"> <label class="radio"> <input checked id="abstracts-0" name="abstracts" type="radio" value="show"> Show abstracts </label> <label class="radio"> <input id="abstracts-1" name="abstracts" type="radio" value="hide"> Hide abstracts </label> </div> </div> <div class="is-clearfix" style="height: 2.5em"> <div class="is-pulled-right"> <a href="/search/advanced?terms-0-term=Genc%2C+H&terms-0-field=author&size=50&order=-announced_date_first">Advanced Search</a> </div> </div> <input type="hidden" name="order" value="-announced_date_first"> <input type="hidden" name="size" value="50"> </form> <div class="level breathe-horizontal"> <div class="level-left"> <form method="GET" action="/search/"> <div style="display: none;"> <select id="searchtype" name="searchtype"><option value="all">All fields</option><option value="title">Title</option><option selected value="author">Author(s)</option><option value="abstract">Abstract</option><option value="comments">Comments</option><option value="journal_ref">Journal reference</option><option value="acm_class">ACM classification</option><option value="msc_class">MSC classification</option><option value="report_num">Report number</option><option value="paper_id">arXiv identifier</option><option value="doi">DOI</option><option value="orcid">ORCID</option><option value="license">License (URI)</option><option value="author_id">arXiv author ID</option><option value="help">Help pages</option><option value="full_text">Full text</option></select> <input id="query" name="query" type="text" value="Genc, H"> <ul id="abstracts"><li><input checked id="abstracts-0" name="abstracts" type="radio" value="show"> <label for="abstracts-0">Show abstracts</label></li><li><input id="abstracts-1" name="abstracts" type="radio" value="hide"> <label for="abstracts-1">Hide abstracts</label></li></ul> </div> <div class="box field is-grouped is-grouped-multiline level-item"> <div class="control"> <span class="select is-small"> <select id="size" name="size"><option value="25">25</option><option selected value="50">50</option><option value="100">100</option><option value="200">200</option></select> </span> <label for="size">results per page</label>. </div> <div class="control"> <label for="order">Sort results by</label> <span class="select is-small"> <select id="order" name="order"><option selected value="-announced_date_first">Announcement date (newest first)</option><option value="announced_date_first">Announcement date (oldest first)</option><option value="-submitted_date">Submission date (newest first)</option><option value="submitted_date">Submission date (oldest first)</option><option value="">Relevance</option></select> </span> </div> <div class="control"> <button class="button is-small is-link">Go</button> </div> </div> </form> </div> </div> <ol class="breathe-horizontal" start="1"> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2405.13465">arXiv:2405.13465</a> <span> [<a href="https://arxiv.org/pdf/2405.13465">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Human-Computer Interaction">cs.HC</span> </div> </div> <p class="title is-5 mathjax"> Designing for Rich Collocated Social Interactions in the Age of Smartphones </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Gen%C3%A7%2C+H+U">H眉seyin U臒ur Gen莽</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2405.13465v1-abstract-short" style="display: inline;"> The quality of social interaction is crucial for psychological and physiological health. Previous research shows that smartphones can negatively impact face-to-face social interactions. Many HCI studies have addressed this by limiting smartphone use during social interactions. While these studies show a decrease in smartphone use, restrictive approaches have their drawbacks. Users need high levels… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.13465v1-abstract-full').style.display = 'inline'; document.getElementById('2405.13465v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2405.13465v1-abstract-full" style="display: none;"> The quality of social interaction is crucial for psychological and physiological health. Previous research shows that smartphones can negatively impact face-to-face social interactions. Many HCI studies have addressed this by limiting smartphone use during social interactions. While these studies show a decrease in smartphone use, restrictive approaches have their drawbacks. Users need high levels of self-regulation to follow them, and they may cause unintended effects like withdrawal symptoms. Given the impact of smartphones on social interactions, both positive and negative, new solutions are needed to reduce the negative effects of excessive smartphone use without resorting to restrictive methods. This thesis aims to explore smartphone use behavior in the context of social interactions and relationships using various data collection techniques to understand how this behavior hinders and supports social interactions. We began with in situ observations and focus group sessions. Based on insights from these steps, we developed two research prototypes to improve social interactions without restricting smartphone use. We gathered user feedback, reactions, and concerns about these prototypes through user studies. Finally, we evaluated how these prototypes affected conversation quality in social interactions through an experimental user study. This thesis contributes to the field of digital well-being by offering user insights, design implications, and approaches that can guide the creation of solutions to enhance social interactions in the presence of smartphones. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2405.13465v1-abstract-full').style.display = 'none'; document.getElementById('2405.13465v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 22 May, 2024; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2024. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">157 Pages, 20 figures, 5 Tables, PhD Thesis</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2310.12072">arXiv:2310.12072</a> <span> [<a href="https://arxiv.org/pdf/2310.12072">pdf</a>, <a href="https://arxiv.org/format/2310.12072">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> </div> </div> <p class="title is-5 mathjax"> SPEED: Speculative Pipelined Execution for Efficient Decoding </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Hooper%2C+C">Coleman Hooper</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+S">Sehoon Kim</a>, <a href="/search/cs?searchtype=author&query=Mohammadzadeh%2C+H">Hiva Mohammadzadeh</a>, <a href="/search/cs?searchtype=author&query=Genc%2C+H">Hasan Genc</a>, <a href="/search/cs?searchtype=author&query=Keutzer%2C+K">Kurt Keutzer</a>, <a href="/search/cs?searchtype=author&query=Gholami%2C+A">Amir Gholami</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+S">Sophia Shao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2310.12072v2-abstract-short" style="display: inline;"> Generative Large Language Models (LLMs) based on the Transformer architecture have recently emerged as a dominant foundation model for a wide range of Natural Language Processing tasks. Nevertheless, their application in real-time scenarios has been highly restricted due to the significant inference latency associated with these models. This is particularly pronounced due to the autoregressive nat… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.12072v2-abstract-full').style.display = 'inline'; document.getElementById('2310.12072v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2310.12072v2-abstract-full" style="display: none;"> Generative Large Language Models (LLMs) based on the Transformer architecture have recently emerged as a dominant foundation model for a wide range of Natural Language Processing tasks. Nevertheless, their application in real-time scenarios has been highly restricted due to the significant inference latency associated with these models. This is particularly pronounced due to the autoregressive nature of generative LLM inference, where tokens are generated sequentially since each token depends on all previous output tokens. It is therefore challenging to achieve any token-level parallelism, making inference extremely memory-bound. In this work, we propose SPEED, which improves inference efficiency by speculatively executing multiple future tokens in parallel with the current token using predicted values based on early-layer hidden states. For Transformer decoders that employ parameter sharing, the memory operations for the tokens executing in parallel can be amortized, which allows us to accelerate generative LLM inference. We demonstrate the efficiency of our method in terms of latency reduction relative to model accuracy and demonstrate how speculation allows for training deeper decoders with parameter sharing with minimal runtime overhead. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2310.12072v2-abstract-full').style.display = 'none'; document.getElementById('2310.12072v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 2 January, 2024; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 18 October, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">NeurIPS Workshop on Efficient Natural Language and Speech Processing (2023)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2308.06410">arXiv:2308.06410</a> <span> [<a href="https://arxiv.org/pdf/2308.06410">pdf</a>, <a href="https://arxiv.org/ps/2308.06410">ps</a>, <a href="https://arxiv.org/format/2308.06410">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Programming Languages">cs.PL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> </div> </div> <p class="title is-5 mathjax"> Code Transpilation for Hardware Accelerators </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Nishida%2C+Y">Yuto Nishida</a>, <a href="/search/cs?searchtype=author&query=Bhatia%2C+S">Sahil Bhatia</a>, <a href="/search/cs?searchtype=author&query=Laddad%2C+S">Shadaj Laddad</a>, <a href="/search/cs?searchtype=author&query=Genc%2C+H">Hasan Genc</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+Y+S">Yakun Sophia Shao</a>, <a href="/search/cs?searchtype=author&query=Cheung%2C+A">Alvin Cheung</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2308.06410v1-abstract-short" style="display: inline;"> DSLs and hardware accelerators have proven to be very effective in optimizing computationally expensive workloads. In this paper, we propose a solution to the challenge of manually rewriting legacy or unoptimized code in domain-specific languages and hardware accelerators. We introduce an approach that integrates two open-source tools: Metalift, a code translation framework, and Gemmini, a DNN acc… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.06410v1-abstract-full').style.display = 'inline'; document.getElementById('2308.06410v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2308.06410v1-abstract-full" style="display: none;"> DSLs and hardware accelerators have proven to be very effective in optimizing computationally expensive workloads. In this paper, we propose a solution to the challenge of manually rewriting legacy or unoptimized code in domain-specific languages and hardware accelerators. We introduce an approach that integrates two open-source tools: Metalift, a code translation framework, and Gemmini, a DNN accelerator generator. The integration of these two tools offers significant benefits, including simplified workflows for developers to run legacy code on Gemmini generated accelerators and a streamlined programming stack for Gemmini that reduces the effort required to add new instructions. This paper provides details on this integration and its potential to simplify and optimize computationally expensive workloads. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2308.06410v1-abstract-full').style.display = 'none'; document.getElementById('2308.06410v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 11 August, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> August 2023. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2305.05843">arXiv:2305.05843</a> <span> [<a href="https://arxiv.org/pdf/2305.05843">pdf</a>, <a href="https://arxiv.org/format/2305.05843">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/HPCA56546.2023.10071035">10.1109/HPCA56546.2023.10071035 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> MoCA: Memory-Centric, Adaptive Execution for Multi-Tenant Deep Neural Networks </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+S">Seah Kim</a>, <a href="/search/cs?searchtype=author&query=Genc%2C+H">Hasan Genc</a>, <a href="/search/cs?searchtype=author&query=Nikiforov%2C+V+V">Vadim Vadimovich Nikiforov</a>, <a href="/search/cs?searchtype=author&query=Asanovi%C4%87%2C+K">Krste Asanovi膰</a>, <a href="/search/cs?searchtype=author&query=Nikoli%C4%87%2C+B">Borivoje Nikoli膰</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+Y+S">Yakun Sophia Shao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2305.05843v1-abstract-short" style="display: inline;"> Driven by the wide adoption of deep neural networks (DNNs) across different application domains, multi-tenancy execution, where multiple DNNs are deployed simultaneously on the same hardware, has been proposed to satisfy the latency requirements of different applications while improving the overall system utilization. However, multi-tenancy execution could lead to undesired system-level resource c… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.05843v1-abstract-full').style.display = 'inline'; document.getElementById('2305.05843v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2305.05843v1-abstract-full" style="display: none;"> Driven by the wide adoption of deep neural networks (DNNs) across different application domains, multi-tenancy execution, where multiple DNNs are deployed simultaneously on the same hardware, has been proposed to satisfy the latency requirements of different applications while improving the overall system utilization. However, multi-tenancy execution could lead to undesired system-level resource contention, causing quality-of-service (QoS) degradation for latency-critical applications. To address this challenge, we propose MoCA, an adaptive multi-tenancy system for DNN accelerators. Unlike existing solutions that focus on compute resource partition, MoCA dynamically manages shared memory resources of co-located applications to meet their QoS targets. Specifically, MoCA leverages the regularities in both DNN operators and accelerators to dynamically modulate memory access rates based on their latency targets and user-defined priorities so that co-located applications get the resources they demand without significantly starving their co-runners. We demonstrate that MoCA improves the satisfaction rate of the service level agreement (SLA) up to 3.9x (1.8x average), system throughput by 2.3x (1.7x average), and fairness by 1.3x (1.2x average), compared to prior work. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2305.05843v1-abstract-full').style.display = 'none'; document.getElementById('2305.05843v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 May, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">2023 HPCA, Reproducibility Badges (Open Research Objects, Research Objects Reviewed, Results Reproduced)</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2302.14017">arXiv:2302.14017</a> <span> [<a href="https://arxiv.org/pdf/2302.14017">pdf</a>, <a href="https://arxiv.org/format/2302.14017">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computation and Language">cs.CL</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> </div> </div> <p class="title is-5 mathjax"> Full Stack Optimization of Transformer Inference: a Survey </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Kim%2C+S">Sehoon Kim</a>, <a href="/search/cs?searchtype=author&query=Hooper%2C+C">Coleman Hooper</a>, <a href="/search/cs?searchtype=author&query=Wattanawong%2C+T">Thanakul Wattanawong</a>, <a href="/search/cs?searchtype=author&query=Kang%2C+M">Minwoo Kang</a>, <a href="/search/cs?searchtype=author&query=Yan%2C+R">Ruohan Yan</a>, <a href="/search/cs?searchtype=author&query=Genc%2C+H">Hasan Genc</a>, <a href="/search/cs?searchtype=author&query=Dinh%2C+G">Grace Dinh</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Q">Qijing Huang</a>, <a href="/search/cs?searchtype=author&query=Keutzer%2C+K">Kurt Keutzer</a>, <a href="/search/cs?searchtype=author&query=Mahoney%2C+M+W">Michael W. Mahoney</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+Y+S">Yakun Sophia Shao</a>, <a href="/search/cs?searchtype=author&query=Gholami%2C+A">Amir Gholami</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2302.14017v1-abstract-short" style="display: inline;"> Recent advances in state-of-the-art DNN architecture design have been moving toward Transformer models. These models achieve superior accuracy across a wide range of applications. This trend has been consistent over the past several years since Transformer models were originally introduced. However, the amount of compute and bandwidth required for inference of recent Transformer models is growing… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.14017v1-abstract-full').style.display = 'inline'; document.getElementById('2302.14017v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2302.14017v1-abstract-full" style="display: none;"> Recent advances in state-of-the-art DNN architecture design have been moving toward Transformer models. These models achieve superior accuracy across a wide range of applications. This trend has been consistent over the past several years since Transformer models were originally introduced. However, the amount of compute and bandwidth required for inference of recent Transformer models is growing at a significant rate, and this has made their deployment in latency-sensitive applications challenging. As such, there has been an increased focus on making Transformer models more efficient, with methods that range from changing the architecture design, all the way to developing dedicated domain-specific accelerators. In this work, we survey different approaches for efficient Transformer inference, including: (i) analysis and profiling of the bottlenecks in existing Transformer architectures and their similarities and differences with previous convolutional models; (ii) implications of Transformer architecture on hardware, including the impact of non-linear operations such as Layer Normalization, Softmax, and GELU, as well as linear operations, on hardware design; (iii) approaches for optimizing a fixed Transformer architecture; (iv) challenges in finding the right mapping and scheduling of operations for Transformer models; and (v) approaches for optimizing Transformer models by adapting the architecture using neural architecture search. Finally, we perform a case study by applying the surveyed optimizations on Gemmini, the open-source, full-stack DNN accelerator generator, and we show how each of these approaches can yield improvements, compared to previous benchmark results on Gemmini. Among other things, we find that a full-stack co-design approach with the aforementioned methods can result in up to 88.7x speedup with a minimal performance degradation for Transformer inference. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2302.14017v1-abstract-full').style.display = 'none'; document.getElementById('2302.14017v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 February, 2023; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> February 2023. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> Presented in Workshop on Architecture and System Support for Transformer Models (ASSYST) at ISCA 2023 </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/2005.13685">arXiv:2005.13685</a> <span> [<a href="https://arxiv.org/pdf/2005.13685">pdf</a>, <a href="https://arxiv.org/format/2005.13685">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Artificial Intelligence">cs.AI</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Performance">cs.PF</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Programming Languages">cs.PL</span> </div> </div> <p class="title is-5 mathjax"> ProTuner: Tuning Programs with Monte Carlo Tree Search </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Haj-Ali%2C+A">Ameer Haj-Ali</a>, <a href="/search/cs?searchtype=author&query=Genc%2C+H">Hasan Genc</a>, <a href="/search/cs?searchtype=author&query=Huang%2C+Q">Qijing Huang</a>, <a href="/search/cs?searchtype=author&query=Moses%2C+W">William Moses</a>, <a href="/search/cs?searchtype=author&query=Wawrzynek%2C+J">John Wawrzynek</a>, <a href="/search/cs?searchtype=author&query=Asanovi%C4%87%2C+K">Krste Asanovi膰</a>, <a href="/search/cs?searchtype=author&query=Stoica%2C+I">Ion Stoica</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="2005.13685v1-abstract-short" style="display: inline;"> We explore applying the Monte Carlo Tree Search (MCTS) algorithm in a notoriously difficult task: tuning programs for high-performance deep learning and image processing. We build our framework on top of Halide and show that MCTS can outperform the state-of-the-art beam-search algorithm. Unlike beam search, which is guided by greedy intermediate performance comparisons between partial and less mea… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.13685v1-abstract-full').style.display = 'inline'; document.getElementById('2005.13685v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="2005.13685v1-abstract-full" style="display: none;"> We explore applying the Monte Carlo Tree Search (MCTS) algorithm in a notoriously difficult task: tuning programs for high-performance deep learning and image processing. We build our framework on top of Halide and show that MCTS can outperform the state-of-the-art beam-search algorithm. Unlike beam search, which is guided by greedy intermediate performance comparisons between partial and less meaningful schedules, MCTS compares complete schedules and looks ahead before making any intermediate scheduling decision. We further explore modifications to the standard MCTS algorithm as well as combining real execution time measurements with the cost model. Our results show that MCTS can outperform beam search on a suite of 16 real benchmarks. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('2005.13685v1-abstract-full').style.display = 'none'; document.getElementById('2005.13685v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 27 May, 2020; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2020. </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1911.09925">arXiv:1911.09925</a> <span> [<a href="https://arxiv.org/pdf/1911.09925">pdf</a>, <a href="https://arxiv.org/format/1911.09925">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Distributed, Parallel, and Cluster Computing">cs.DC</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Hardware Architecture">cs.AR</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Machine Learning">cs.LG</span> <span class="tag is-small is-grey tooltip is-tooltip-top" data-tooltip="Performance">cs.PF</span> </div> </div> <p class="title is-5 mathjax"> Gemmini: Enabling Systematic Deep-Learning Architecture Evaluation via Full-Stack Integration </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Genc%2C+H">Hasan Genc</a>, <a href="/search/cs?searchtype=author&query=Kim%2C+S">Seah Kim</a>, <a href="/search/cs?searchtype=author&query=Amid%2C+A">Alon Amid</a>, <a href="/search/cs?searchtype=author&query=Haj-Ali%2C+A">Ameer Haj-Ali</a>, <a href="/search/cs?searchtype=author&query=Iyer%2C+V">Vighnesh Iyer</a>, <a href="/search/cs?searchtype=author&query=Prakash%2C+P">Pranav Prakash</a>, <a href="/search/cs?searchtype=author&query=Zhao%2C+J">Jerry Zhao</a>, <a href="/search/cs?searchtype=author&query=Grubb%2C+D">Daniel Grubb</a>, <a href="/search/cs?searchtype=author&query=Liew%2C+H">Harrison Liew</a>, <a href="/search/cs?searchtype=author&query=Mao%2C+H">Howard Mao</a>, <a href="/search/cs?searchtype=author&query=Ou%2C+A">Albert Ou</a>, <a href="/search/cs?searchtype=author&query=Schmidt%2C+C">Colin Schmidt</a>, <a href="/search/cs?searchtype=author&query=Steffl%2C+S">Samuel Steffl</a>, <a href="/search/cs?searchtype=author&query=Wright%2C+J">John Wright</a>, <a href="/search/cs?searchtype=author&query=Stoica%2C+I">Ion Stoica</a>, <a href="/search/cs?searchtype=author&query=Ragan-Kelley%2C+J">Jonathan Ragan-Kelley</a>, <a href="/search/cs?searchtype=author&query=Asanovic%2C+K">Krste Asanovic</a>, <a href="/search/cs?searchtype=author&query=Nikolic%2C+B">Borivoje Nikolic</a>, <a href="/search/cs?searchtype=author&query=Shao%2C+Y+S">Yakun Sophia Shao</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1911.09925v3-abstract-short" style="display: inline;"> DNN accelerators are often developed and evaluated in isolation without considering the cross-stack, system-level effects in real-world environments. This makes it difficult to appreciate the impact of System-on-Chip (SoC) resource contention, OS overheads, and programming-stack inefficiencies on overall performance/energy-efficiency. To address this challenge, we present Gemmini, an open-source*,… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.09925v3-abstract-full').style.display = 'inline'; document.getElementById('1911.09925v3-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1911.09925v3-abstract-full" style="display: none;"> DNN accelerators are often developed and evaluated in isolation without considering the cross-stack, system-level effects in real-world environments. This makes it difficult to appreciate the impact of System-on-Chip (SoC) resource contention, OS overheads, and programming-stack inefficiencies on overall performance/energy-efficiency. To address this challenge, we present Gemmini, an open-source*, full-stack DNN accelerator generator. Gemmini generates a wide design-space of efficient ASIC accelerators from a flexible architectural template, together with flexible programming stacks and full SoCs with shared resources that capture system-level effects. Gemmini-generated accelerators have also been fabricated, delivering up to three orders-of-magnitude speedups over high-performance CPUs on various DNN benchmarks. * https://github.com/ucb-bar/gemmini <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1911.09925v3-abstract-full').style.display = 'none'; document.getElementById('1911.09925v3-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 9 July, 2021; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 22 November, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> November 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">To appear at the 58th IEEE/ACM Design Automation Conference (DAC), December 2021, San Francisco, CA, USA</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1906.10513">arXiv:1906.10513</a> <span> [<a href="https://arxiv.org/pdf/1906.10513">pdf</a>, <a href="https://arxiv.org/format/1906.10513">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> The Role of Compute in Autonomous Aerial Vehicles </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Boroujerdian%2C+B">Behzad Boroujerdian</a>, <a href="/search/cs?searchtype=author&query=Genc%2C+H">Hasan Genc</a>, <a href="/search/cs?searchtype=author&query=Krishnan%2C+S">Srivatsan Krishnan</a>, <a href="/search/cs?searchtype=author&query=Duisterhof%2C+B+P">Bardienus Pieter Duisterhof</a>, <a href="/search/cs?searchtype=author&query=Plancher%2C+B">Brian Plancher</a>, <a href="/search/cs?searchtype=author&query=Mansoorshahi%2C+K">Kayvan Mansoorshahi</a>, <a href="/search/cs?searchtype=author&query=Almeida%2C+M">Marcelino Almeida</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+W">Wenzhi Cui</a>, <a href="/search/cs?searchtype=author&query=Faust%2C+A">Aleksandra Faust</a>, <a href="/search/cs?searchtype=author&query=Reddi%2C+V+J">Vijay Janapa Reddi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1906.10513v1-abstract-short" style="display: inline;"> Autonomous-mobile cyber-physical machines are part of our future. Specifically, unmanned-aerial-vehicles have seen a resurgence in activity with use-cases such as package delivery. These systems face many challenges such as their low-endurance caused by limited onboard-energy, hence, improving the mission-time and energy are of importance. Such improvements traditionally are delivered through bett… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1906.10513v1-abstract-full').style.display = 'inline'; document.getElementById('1906.10513v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1906.10513v1-abstract-full" style="display: none;"> Autonomous-mobile cyber-physical machines are part of our future. Specifically, unmanned-aerial-vehicles have seen a resurgence in activity with use-cases such as package delivery. These systems face many challenges such as their low-endurance caused by limited onboard-energy, hence, improving the mission-time and energy are of importance. Such improvements traditionally are delivered through better algorithms. But our premise is that more powerful and efficient onboard-compute should also address the problem. This paper investigates how the compute subsystem, in a cyber-physical mobile machine, such as a Micro Aerial Vehicle, impacts mission-time and energy. Specifically, we pose the question as what is the role of computing for cyber-physical mobile robots? We show that compute and motion are tightly intertwined, hence a close examination of cyber and physical processes and their impact on one another is necessary. We show different impact paths through which compute impacts mission-metrics and examine them using analytical models, simulation, and end-to-end benchmarking. To enable similar studies, we open sourced MAVBench, our tool-set consisting of a closed-loop simulator and a benchmark suite. Our investigations show cyber-physical co-design, a methodology where robot's cyber and physical processes/quantities are developed with one another consideration, similar to hardware-software co-design, is necessary for optimal robot design. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1906.10513v1-abstract-full').style.display = 'none'; document.getElementById('1906.10513v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 23 June, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> June 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">arXiv admin note: substantial text overlap with arXiv:1905.06388</span> </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1905.06388">arXiv:1905.06388</a> <span> [<a href="https://arxiv.org/pdf/1905.06388">pdf</a>, <a href="https://arxiv.org/format/1905.06388">other</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Robotics">cs.RO</span> </div> </div> <p class="title is-5 mathjax"> MAVBench: Micro Aerial Vehicle Benchmarking </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Boroujerdian%2C+B">Behzad Boroujerdian</a>, <a href="/search/cs?searchtype=author&query=Genc%2C+H">Hasan Genc</a>, <a href="/search/cs?searchtype=author&query=Krishnan%2C+S">Srivatsan Krishnan</a>, <a href="/search/cs?searchtype=author&query=Cui%2C+W">Wenzhi Cui</a>, <a href="/search/cs?searchtype=author&query=Faust%2C+A">Aleksandra Faust</a>, <a href="/search/cs?searchtype=author&query=Reddi%2C+V+J">Vijay Janapa Reddi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1905.06388v2-abstract-short" style="display: inline;"> Unmanned Aerial Vehicles (UAVs) are getting closer to becoming ubiquitous in everyday life. Among them, Micro Aerial Vehicles (MAVs) have seen an outburst of attention recently, specifically in the area with a demand for autonomy. A key challenge standing in the way of making MAVs autonomous is that researchers lack the comprehensive understanding of how performance, power, and computational bottl… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1905.06388v2-abstract-full').style.display = 'inline'; document.getElementById('1905.06388v2-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1905.06388v2-abstract-full" style="display: none;"> Unmanned Aerial Vehicles (UAVs) are getting closer to becoming ubiquitous in everyday life. Among them, Micro Aerial Vehicles (MAVs) have seen an outburst of attention recently, specifically in the area with a demand for autonomy. A key challenge standing in the way of making MAVs autonomous is that researchers lack the comprehensive understanding of how performance, power, and computational bottlenecks affect MAV applications. MAVs must operate under a stringent power budget, which severely limits their flight endurance time. As such, there is a need for new tools, benchmarks, and methodologies to foster the systematic development of autonomous MAVs. In this paper, we introduce the `MAVBench' framework which consists of a closed-loop simulator and an end-to-end application benchmark suite. A closed-loop simulation platform is needed to probe and understand the intra-system (application data flow) and inter-system (system and environment) interactions in MAV applications to pinpoint bottlenecks and identify opportunities for hardware and software co-design and optimization. In addition to the simulator, MAVBench provides a benchmark suite, the first of its kind, consisting of a variety of MAV applications designed to enable computer architects to perform characterization and develop future aerial computing systems. Using our open source, end-to-end experimental platform, we uncover a hidden, and thus far unexpected compute to total system energy relationship in MAVs. Furthermore, we explore the role of compute by presenting three case studies targeting performance, energy and reliability. These studies confirm that an efficient system design can improve MAV's battery consumption by up to 1.8X. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1905.06388v2-abstract-full').style.display = 'none'; document.getElementById('1905.06388v2-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 31 May, 2019; <span class="has-text-black-bis has-text-weight-semibold">v1</span> submitted 15 May, 2019; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> May 2019. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> 2018 51st Annual IEEE/ACM International Symposium on Microarchitecture (MICRO) </p> </li> <li class="arxiv-result"> <div class="is-marginless"> <p class="list-title is-inline-block"><a href="https://arxiv.org/abs/1810.02010">arXiv:1810.02010</a> <span> [<a href="https://arxiv.org/pdf/1810.02010">pdf</a>] </span> </p> <div class="tags is-inline-block"> <span class="tag is-small is-link tooltip is-tooltip-top" data-tooltip="Computer Vision and Pattern Recognition">cs.CV</span> </div> <div class="is-inline-block" style="margin-left: 0.5rem"> <div class="tags has-addons"> <span class="tag is-dark is-size-7">doi</span> <span class="tag is-light is-size-7"><a class="" href="https://doi.org/10.1109/MM.2018.112130335">10.1109/MM.2018.112130335 <i class="fa fa-external-link" aria-hidden="true"></i></a></span> </div> </div> </div> <p class="title is-5 mathjax"> Domain Specific Approximation for Object Detection </p> <p class="authors"> <span class="search-hit">Authors:</span> <a href="/search/cs?searchtype=author&query=Chin%2C+T">Ting-Wu Chin</a>, <a href="/search/cs?searchtype=author&query=Yu%2C+C">Chia-Lin Yu</a>, <a href="/search/cs?searchtype=author&query=Halpern%2C+M">Matthew Halpern</a>, <a href="/search/cs?searchtype=author&query=Genc%2C+H">Hasan Genc</a>, <a href="/search/cs?searchtype=author&query=Tsao%2C+S">Shiao-Li Tsao</a>, <a href="/search/cs?searchtype=author&query=Reddi%2C+V+J">Vijay Janapa Reddi</a> </p> <p class="abstract mathjax"> <span class="has-text-black-bis has-text-weight-semibold">Abstract</span>: <span class="abstract-short has-text-grey-dark mathjax" id="1810.02010v1-abstract-short" style="display: inline;"> There is growing interest in object detection in advanced driver assistance systems and autonomous robots and vehicles. To enable such innovative systems, we need faster object detection. In this work, we investigate the trade-off between accuracy and speed with domain-specific approximations, i.e. category-aware image size scaling and proposals scaling, for two state-of-the-art deep learning-base… <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1810.02010v1-abstract-full').style.display = 'inline'; document.getElementById('1810.02010v1-abstract-short').style.display = 'none';">▽ More</a> </span> <span class="abstract-full has-text-grey-dark mathjax" id="1810.02010v1-abstract-full" style="display: none;"> There is growing interest in object detection in advanced driver assistance systems and autonomous robots and vehicles. To enable such innovative systems, we need faster object detection. In this work, we investigate the trade-off between accuracy and speed with domain-specific approximations, i.e. category-aware image size scaling and proposals scaling, for two state-of-the-art deep learning-based object detection meta-architectures. We study the effectiveness of applying approximation both statically and dynamically to understand the potential and the applicability of them. By conducting experiments on the ImageNet VID dataset, we show that domain-specific approximation has great potential to improve the speed of the system without deteriorating the accuracy of object detectors, i.e. up to 7.5x speedup for dynamic domain-specific approximation. To this end, we present our insights toward harvesting domain-specific approximation as well as devise a proof-of-concept runtime, AutoFocus, that exploits dynamic domain-specific approximation. <a class="is-size-7" style="white-space: nowrap;" onclick="document.getElementById('1810.02010v1-abstract-full').style.display = 'none'; document.getElementById('1810.02010v1-abstract-short').style.display = 'inline';">△ Less</a> </span> </p> <p class="is-size-7"><span class="has-text-black-bis has-text-weight-semibold">Submitted</span> 3 October, 2018; <span class="has-text-black-bis has-text-weight-semibold">originally announced</span> October 2018. </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Comments:</span> <span class="has-text-grey-dark mathjax">6 pages, 6 figures. Published in IEEE Micro, vol. 38, no. 1, pp. 31-40, January/February 2018</span> </p> <p class="comments is-size-7"> <span class="has-text-black-bis has-text-weight-semibold">Journal ref:</span> T. Chin, C. Yu, M. Halpern, H. Genc, S. Tsao and V. J. Reddi, "Domain-Specific Approximation for Object Detection," in IEEE Micro, vol. 38, no. 1, pp. 31-40, January/February 2018 </p> </li> </ol> <div class="is-hidden-tablet"> <!-- feedback for mobile only --> <span class="help" style="display: inline-block;"><a href="https://github.com/arXiv/arxiv-search/releases">Search v0.5.6 released 2020-02-24</a> </span> </div> </div> </main> <footer> <div class="columns is-desktop" role="navigation" aria-label="Secondary"> <!-- MetaColumn 1 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/about">About</a></li> <li><a href="https://info.arxiv.org/help">Help</a></li> </ul> </div> <div class="column"> <ul class="nav-spaced"> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>contact arXiv</title><desc>Click here to contact arXiv</desc><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg> <a href="https://info.arxiv.org/help/contact.html"> Contact</a> </li> <li> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><title>subscribe to arXiv mailings</title><desc>Click here to subscribe</desc><path d="M476 3.2L12.5 270.6c-18.1 10.4-15.8 35.6 2.2 43.2L121 358.4l287.3-253.2c5.5-4.9 13.3 2.6 8.6 8.3L176 407v80.5c0 23.6 28.5 32.9 42.5 15.8L282 426l124.6 52.2c14.2 6 30.4-2.9 33-18.2l72-432C515 7.8 493.3-6.8 476 3.2z"/></svg> <a href="https://info.arxiv.org/help/subscribe"> Subscribe</a> </li> </ul> </div> </div> </div> <!-- end MetaColumn 1 --> <!-- MetaColumn 2 --> <div class="column"> <div class="columns"> <div class="column"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/license/index.html">Copyright</a></li> <li><a href="https://info.arxiv.org/help/policies/privacy_policy.html">Privacy Policy</a></li> </ul> </div> <div class="column sorry-app-links"> <ul class="nav-spaced"> <li><a href="https://info.arxiv.org/help/web_accessibility.html">Web Accessibility Assistance</a></li> <li> <p class="help"> <a class="a11y-main-link" href="https://status.arxiv.org" target="_blank">arXiv Operational Status <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 256 512" class="icon filter-dark_grey" role="presentation"><path d="M224.3 273l-136 136c-9.4 9.4-24.6 9.4-33.9 0l-22.6-22.6c-9.4-9.4-9.4-24.6 0-33.9l96.4-96.4-96.4-96.4c-9.4-9.4-9.4-24.6 0-33.9L54.3 103c9.4-9.4 24.6-9.4 33.9 0l136 136c9.5 9.4 9.5 24.6.1 34z"/></svg></a><br> Get status notifications via <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/email/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" class="icon filter-black" role="presentation"><path d="M502.3 190.8c3.9-3.1 9.7-.2 9.7 4.7V400c0 26.5-21.5 48-48 48H48c-26.5 0-48-21.5-48-48V195.6c0-5 5.7-7.8 9.7-4.7 22.4 17.4 52.1 39.5 154.1 113.6 21.1 15.4 56.7 47.8 92.2 47.6 35.7.3 72-32.8 92.3-47.6 102-74.1 131.6-96.3 154-113.7zM256 320c23.2.4 56.6-29.2 73.4-41.4 132.7-96.3 142.8-104.7 173.4-128.7 5.8-4.5 9.2-11.5 9.2-18.9v-19c0-26.5-21.5-48-48-48H48C21.5 64 0 85.5 0 112v19c0 7.4 3.4 14.3 9.2 18.9 30.6 23.9 40.7 32.4 173.4 128.7 16.8 12.2 50.2 41.8 73.4 41.4z"/></svg>email</a> or <a class="is-link" href="https://subscribe.sorryapp.com/24846f03/slack/new" target="_blank"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512" class="icon filter-black" role="presentation"><path d="M94.12 315.1c0 25.9-21.16 47.06-47.06 47.06S0 341 0 315.1c0-25.9 21.16-47.06 47.06-47.06h47.06v47.06zm23.72 0c0-25.9 21.16-47.06 47.06-47.06s47.06 21.16 47.06 47.06v117.84c0 25.9-21.16 47.06-47.06 47.06s-47.06-21.16-47.06-47.06V315.1zm47.06-188.98c-25.9 0-47.06-21.16-47.06-47.06S139 32 164.9 32s47.06 21.16 47.06 47.06v47.06H164.9zm0 23.72c25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06H47.06C21.16 243.96 0 222.8 0 196.9s21.16-47.06 47.06-47.06H164.9zm188.98 47.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06s-21.16 47.06-47.06 47.06h-47.06V196.9zm-23.72 0c0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06V79.06c0-25.9 21.16-47.06 47.06-47.06 25.9 0 47.06 21.16 47.06 47.06V196.9zM283.1 385.88c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06-25.9 0-47.06-21.16-47.06-47.06v-47.06h47.06zm0-23.72c-25.9 0-47.06-21.16-47.06-47.06 0-25.9 21.16-47.06 47.06-47.06h117.84c25.9 0 47.06 21.16 47.06 47.06 0 25.9-21.16 47.06-47.06 47.06H283.1z"/></svg>slack</a> </p> </li> </ul> </div> </div> </div> <!-- end MetaColumn 2 --> </div> </footer> <script src="https://static.arxiv.org/static/base/1.0.0a5/js/member_acknowledgement.js"></script> </body> </html>