CINXE.COM

CUDA - Wikipedia

<!DOCTYPE html> <html class="client-nojs vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 vector-feature-limited-width-content-enabled vector-feature-custom-font-size-clientpref-1 vector-feature-appearance-pinned-clientpref-1 vector-feature-night-mode-enabled skin-theme-clientpref-day vector-sticky-header-enabled vector-toc-available" lang="en" dir="ltr"> <head> <meta charset="UTF-8"> <title>CUDA - Wikipedia</title> <script>(function(){var className="client-js vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 vector-feature-limited-width-content-enabled vector-feature-custom-font-size-clientpref-1 vector-feature-appearance-pinned-clientpref-1 vector-feature-night-mode-enabled skin-theme-clientpref-day vector-sticky-header-enabled vector-toc-available";var cookie=document.cookie.match(/(?:^|; )enwikimwclientpreferences=([^;]+)/);if(cookie){cookie[1].split('%2C').forEach(function(pref){className=className.replace(new RegExp('(^| )'+pref.replace(/-clientpref-\w+$|[^\w-]+/g,'')+'-clientpref-\\w+( |$)'),'$1'+pref+'$2');});}document.documentElement.className=className;}());RLCONF={"wgBreakFrames":false,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"a848a628-f72a-4ad4-8508-95ed71cd7115","wgCanonicalNamespace":"","wgCanonicalSpecialPageName":false,"wgNamespaceNumber":0,"wgPageName":"CUDA","wgTitle":"CUDA","wgCurRevisionId":1282172238,"wgRevisionId":1282172238,"wgArticleId":7933386,"wgIsArticle":true,"wgIsRedirect":false,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Articles with short description","Short description matches Wikidata","Articles needing cleanup from February 2024","All pages needing cleanup","Articles containing how-to sections","Articles containing pro and con lists","Wikipedia articles with style issues from February 2024","All articles with style issues","Articles with multiple maintenance issues","Wikipedia articles in need of updating from December 2022","All Wikipedia articles in need of updating","All articles with unsourced statements","Articles with unsourced statements from May 2016","Computer physics engines","GPGPU","GPGPU libraries","Graphics hardware","Nvidia software","Parallel computing","Graphics cards","Video game hardware"],"wgPageViewLanguage":"en","wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgRelevantPageName":"CUDA","wgRelevantArticleId":7933386,"wgIsProbablyEditable":true,"wgRelevantPageIsProbablyEditable":true,"wgRestrictionEdit":[],"wgRestrictionMove":[],"wgNoticeProject":"wikipedia","wgCiteReferencePreviewsActive":false,"wgFlaggedRevsParams":{"tags":{"status":{"levels":1}}},"wgMediaViewerOnClick":true,"wgMediaViewerEnabledByDefault":true,"wgPopupsFlags":0,"wgVisualEditor":{"pageLanguageCode":"en","pageLanguageDir":"ltr","pageVariantFallbacks":"en"},"wgMFDisplayWikibaseDescriptions":{"search":true,"watchlist":true,"tagline":false,"nearby":true},"wgWMESchemaEditAttemptStepOversample":false,"wgWMEPageLength":90000,"wgEditSubmitButtonLabelPublish":true,"wgULSPosition":"interlanguage","wgULSisCompactLinksEnabled":false,"wgVector2022LanguageInHeader":true,"wgULSisLanguageSelectorEmpty":false,"wgWikibaseItemId":"Q477690","wgCheckUserClientHintsHeadersJsApi":["brands","architecture","bitness","fullVersionList","mobile","model","platform","platformVersion"],"GEHomepageSuggestedEditsEnableTopics":true,"wgGETopicsMatchModeEnabled":false,"wgGELevelingUpEnabledForUser":false}; RLSTATE={"ext.globalCssJs.user.styles":"ready","site.styles":"ready","user.styles":"ready","ext.globalCssJs.user":"ready","user":"ready","user.options":"loading","ext.cite.styles":"ready","ext.pygments":"ready","skins.vector.search.codex.styles":"ready","skins.vector.styles":"ready","skins.vector.icons":"ready","jquery.makeCollapsible.styles":"ready","ext.wikimediamessages.styles":"ready","ext.visualEditor.desktopArticleTarget.noscript":"ready","ext.uls.interlanguage":"ready","wikibase.client.init":"ready"};RLPAGEMODULES=["ext.cite.ux-enhancements","ext.pygments.view","mediawiki.page.media","site","mediawiki.page.ready","jquery.makeCollapsible","mediawiki.toc","skins.vector.js","ext.centralNotice.geoIP","ext.centralNotice.startUp","ext.gadget.ReferenceTooltips","ext.gadget.switcher","ext.urlShortener.toolbar","ext.centralauth.centralautologin","mmv.bootstrap","ext.popups","ext.visualEditor.desktopArticleTarget.init","ext.visualEditor.targetLoader","ext.echo.centralauth","ext.eventLogging","ext.wikimediaEvents","ext.navigationTiming","ext.uls.interface","ext.cx.eventlogging.campaigns","ext.cx.uls.quick.actions","wikibase.client.vector-2022","ext.checkUser.clientHints","ext.growthExperiments.SuggestedEditSession"];</script> <script>(RLQ=window.RLQ||[]).push(function(){mw.loader.impl(function(){return["user.options@12s5i",function($,jQuery,require,module){mw.user.tokens.set({"patrolToken":"+\\","watchToken":"+\\","csrfToken":"+\\"}); }];});});</script> <link rel="stylesheet" href="/w/load.php?lang=en&amp;modules=ext.cite.styles%7Cext.pygments%7Cext.uls.interlanguage%7Cext.visualEditor.desktopArticleTarget.noscript%7Cext.wikimediamessages.styles%7Cjquery.makeCollapsible.styles%7Cskins.vector.icons%2Cstyles%7Cskins.vector.search.codex.styles%7Cwikibase.client.init&amp;only=styles&amp;skin=vector-2022"> <script async="" src="/w/load.php?lang=en&amp;modules=startup&amp;only=scripts&amp;raw=1&amp;skin=vector-2022"></script> <meta name="ResourceLoaderDynamicStyles" content=""> <link rel="stylesheet" href="/w/load.php?lang=en&amp;modules=site.styles&amp;only=styles&amp;skin=vector-2022"> <meta name="generator" content="MediaWiki 1.44.0-wmf.21"> <meta name="referrer" content="origin"> <meta name="referrer" content="origin-when-cross-origin"> <meta name="robots" content="max-image-preview:standard"> <meta name="format-detection" content="telephone=no"> <meta property="og:image" content="https://upload.wikimedia.org/wikipedia/commons/b/b9/Nvidia_CUDA_Logo.jpg"> <meta property="og:image:width" content="1200"> <meta property="og:image:height" content="727"> <meta property="og:image" content="https://upload.wikimedia.org/wikipedia/commons/b/b9/Nvidia_CUDA_Logo.jpg"> <meta property="og:image:width" content="800"> <meta property="og:image:height" content="485"> <meta property="og:image:width" content="640"> <meta property="og:image:height" content="388"> <meta name="viewport" content="width=1120"> <meta property="og:title" content="CUDA - Wikipedia"> <meta property="og:type" content="website"> <link rel="preconnect" href="//upload.wikimedia.org"> <link rel="alternate" media="only screen and (max-width: 640px)" href="//en.m.wikipedia.org/wiki/CUDA"> <link rel="alternate" type="application/x-wiki" title="Edit this page" href="/w/index.php?title=CUDA&amp;action=edit"> <link rel="apple-touch-icon" href="/static/apple-touch/wikipedia.png"> <link rel="icon" href="/static/favicon/wikipedia.ico"> <link rel="search" type="application/opensearchdescription+xml" href="/w/rest.php/v1/search" title="Wikipedia (en)"> <link rel="EditURI" type="application/rsd+xml" href="//en.wikipedia.org/w/api.php?action=rsd"> <link rel="canonical" href="https://en.wikipedia.org/wiki/CUDA"> <link rel="license" href="https://creativecommons.org/licenses/by-sa/4.0/deed.en"> <link rel="alternate" type="application/atom+xml" title="Wikipedia Atom feed" href="/w/index.php?title=Special:RecentChanges&amp;feed=atom"> <link rel="dns-prefetch" href="//meta.wikimedia.org" /> <link rel="dns-prefetch" href="login.wikimedia.org"> </head> <body class="skin--responsive skin-vector skin-vector-search-vue mediawiki ltr sitedir-ltr mw-hide-empty-elt ns-0 ns-subject mw-editable page-CUDA rootpage-CUDA skin-vector-2022 action-view"><a class="mw-jump-link" href="#bodyContent">Jump to content</a> <div class="vector-header-container"> <header class="vector-header mw-header"> <div class="vector-header-start"> <nav class="vector-main-menu-landmark" aria-label="Site"> <div id="vector-main-menu-dropdown" class="vector-dropdown vector-main-menu-dropdown vector-button-flush-left vector-button-flush-right" title="Main menu" > <input type="checkbox" id="vector-main-menu-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-main-menu-dropdown" class="vector-dropdown-checkbox " aria-label="Main menu" > <label id="vector-main-menu-dropdown-label" for="vector-main-menu-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-menu mw-ui-icon-wikimedia-menu"></span> <span class="vector-dropdown-label-text">Main menu</span> </label> <div class="vector-dropdown-content"> <div id="vector-main-menu-unpinned-container" class="vector-unpinned-container"> <div id="vector-main-menu" class="vector-main-menu vector-pinnable-element"> <div class="vector-pinnable-header vector-main-menu-pinnable-header vector-pinnable-header-unpinned" data-feature-name="main-menu-pinned" data-pinnable-element-id="vector-main-menu" data-pinned-container-id="vector-main-menu-pinned-container" data-unpinned-container-id="vector-main-menu-unpinned-container" > <div class="vector-pinnable-header-label">Main menu</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-main-menu.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-main-menu.unpin">hide</button> </div> <div id="p-navigation" class="vector-menu mw-portlet mw-portlet-navigation" > <div class="vector-menu-heading"> Navigation </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="n-mainpage-description" class="mw-list-item"><a href="/wiki/Main_Page" title="Visit the main page [z]" accesskey="z"><span>Main page</span></a></li><li id="n-contents" class="mw-list-item"><a href="/wiki/Wikipedia:Contents" title="Guides to browsing Wikipedia"><span>Contents</span></a></li><li id="n-currentevents" class="mw-list-item"><a href="/wiki/Portal:Current_events" title="Articles related to current events"><span>Current events</span></a></li><li id="n-randompage" class="mw-list-item"><a href="/wiki/Special:Random" title="Visit a randomly selected article [x]" accesskey="x"><span>Random article</span></a></li><li id="n-aboutsite" class="mw-list-item"><a href="/wiki/Wikipedia:About" title="Learn about Wikipedia and how it works"><span>About Wikipedia</span></a></li><li id="n-contactpage" class="mw-list-item"><a href="//en.wikipedia.org/wiki/Wikipedia:Contact_us" title="How to contact Wikipedia"><span>Contact us</span></a></li> </ul> </div> </div> <div id="p-interaction" class="vector-menu mw-portlet mw-portlet-interaction" > <div class="vector-menu-heading"> Contribute </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="n-help" class="mw-list-item"><a href="/wiki/Help:Contents" title="Guidance on how to use and edit Wikipedia"><span>Help</span></a></li><li id="n-introduction" class="mw-list-item"><a href="/wiki/Help:Introduction" title="Learn how to edit Wikipedia"><span>Learn to edit</span></a></li><li id="n-portal" class="mw-list-item"><a href="/wiki/Wikipedia:Community_portal" title="The hub for editors"><span>Community portal</span></a></li><li id="n-recentchanges" class="mw-list-item"><a href="/wiki/Special:RecentChanges" title="A list of recent changes to Wikipedia [r]" accesskey="r"><span>Recent changes</span></a></li><li id="n-upload" class="mw-list-item"><a href="/wiki/Wikipedia:File_upload_wizard" title="Add images or other media for use on Wikipedia"><span>Upload file</span></a></li><li id="n-specialpages" class="mw-list-item"><a href="/wiki/Special:SpecialPages"><span>Special pages</span></a></li> </ul> </div> </div> </div> </div> </div> </div> </nav> <a href="/wiki/Main_Page" class="mw-logo"> <img class="mw-logo-icon" src="/static/images/icons/wikipedia.png" alt="" aria-hidden="true" height="50" width="50"> <span class="mw-logo-container skin-invert"> <img class="mw-logo-wordmark" alt="Wikipedia" src="/static/images/mobile/copyright/wikipedia-wordmark-en.svg" style="width: 7.5em; height: 1.125em;"> <img class="mw-logo-tagline" alt="The Free Encyclopedia" src="/static/images/mobile/copyright/wikipedia-tagline-en.svg" width="117" height="13" style="width: 7.3125em; height: 0.8125em;"> </span> </a> </div> <div class="vector-header-end"> <div id="p-search" role="search" class="vector-search-box-vue vector-search-box-collapses vector-search-box-show-thumbnail vector-search-box-auto-expand-width vector-search-box"> <a href="/wiki/Special:Search" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only search-toggle" title="Search Wikipedia [f]" accesskey="f"><span class="vector-icon mw-ui-icon-search mw-ui-icon-wikimedia-search"></span> <span>Search</span> </a> <div class="vector-typeahead-search-container"> <div class="cdx-typeahead-search cdx-typeahead-search--show-thumbnail cdx-typeahead-search--auto-expand-width"> <form action="/w/index.php" id="searchform" class="cdx-search-input cdx-search-input--has-end-button"> <div id="simpleSearch" class="cdx-search-input__input-wrapper" data-search-loc="header-moved"> <div class="cdx-text-input cdx-text-input--has-start-icon"> <input class="cdx-text-input__input" type="search" name="search" placeholder="Search Wikipedia" aria-label="Search Wikipedia" autocapitalize="sentences" title="Search Wikipedia [f]" accesskey="f" id="searchInput" > <span class="cdx-text-input__icon cdx-text-input__start-icon"></span> </div> <input type="hidden" name="title" value="Special:Search"> </div> <button class="cdx-button cdx-search-input__end-button">Search</button> </form> </div> </div> </div> <nav class="vector-user-links vector-user-links-wide" aria-label="Personal tools"> <div class="vector-user-links-main"> <div id="p-vector-user-menu-preferences" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <div id="p-vector-user-menu-userpage" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <nav class="vector-appearance-landmark" aria-label="Appearance"> <div id="vector-appearance-dropdown" class="vector-dropdown " title="Change the appearance of the page&#039;s font size, width, and color" > <input type="checkbox" id="vector-appearance-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-appearance-dropdown" class="vector-dropdown-checkbox " aria-label="Appearance" > <label id="vector-appearance-dropdown-label" for="vector-appearance-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-appearance mw-ui-icon-wikimedia-appearance"></span> <span class="vector-dropdown-label-text">Appearance</span> </label> <div class="vector-dropdown-content"> <div id="vector-appearance-unpinned-container" class="vector-unpinned-container"> </div> </div> </div> </nav> <div id="p-vector-user-menu-notifications" class="vector-menu mw-portlet emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> <div id="p-vector-user-menu-overflow" class="vector-menu mw-portlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-sitesupport-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="https://donate.wikimedia.org/?wmf_source=donate&amp;wmf_medium=sidebar&amp;wmf_campaign=en.wikipedia.org&amp;uselang=en" class=""><span>Donate</span></a> </li> <li id="pt-createaccount-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="/w/index.php?title=Special:CreateAccount&amp;returnto=CUDA" title="You are encouraged to create an account and log in; however, it is not mandatory" class=""><span>Create account</span></a> </li> <li id="pt-login-2" class="user-links-collapsible-item mw-list-item user-links-collapsible-item"><a data-mw="interface" href="/w/index.php?title=Special:UserLogin&amp;returnto=CUDA" title="You&#039;re encouraged to log in; however, it&#039;s not mandatory. [o]" accesskey="o" class=""><span>Log in</span></a> </li> </ul> </div> </div> </div> <div id="vector-user-links-dropdown" class="vector-dropdown vector-user-menu vector-button-flush-right vector-user-menu-logged-out" title="Log in and more options" > <input type="checkbox" id="vector-user-links-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-user-links-dropdown" class="vector-dropdown-checkbox " aria-label="Personal tools" > <label id="vector-user-links-dropdown-label" for="vector-user-links-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-ellipsis mw-ui-icon-wikimedia-ellipsis"></span> <span class="vector-dropdown-label-text">Personal tools</span> </label> <div class="vector-dropdown-content"> <div id="p-personal" class="vector-menu mw-portlet mw-portlet-personal user-links-collapsible-item" title="User menu" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-sitesupport" class="user-links-collapsible-item mw-list-item"><a href="https://donate.wikimedia.org/?wmf_source=donate&amp;wmf_medium=sidebar&amp;wmf_campaign=en.wikipedia.org&amp;uselang=en"><span>Donate</span></a></li><li id="pt-createaccount" class="user-links-collapsible-item mw-list-item"><a href="/w/index.php?title=Special:CreateAccount&amp;returnto=CUDA" title="You are encouraged to create an account and log in; however, it is not mandatory"><span class="vector-icon mw-ui-icon-userAdd mw-ui-icon-wikimedia-userAdd"></span> <span>Create account</span></a></li><li id="pt-login" class="user-links-collapsible-item mw-list-item"><a href="/w/index.php?title=Special:UserLogin&amp;returnto=CUDA" title="You&#039;re encouraged to log in; however, it&#039;s not mandatory. [o]" accesskey="o"><span class="vector-icon mw-ui-icon-logIn mw-ui-icon-wikimedia-logIn"></span> <span>Log in</span></a></li> </ul> </div> </div> <div id="p-user-menu-anon-editor" class="vector-menu mw-portlet mw-portlet-user-menu-anon-editor" > <div class="vector-menu-heading"> Pages for logged out editors <a href="/wiki/Help:Introduction" aria-label="Learn more about editing"><span>learn more</span></a> </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="pt-anoncontribs" class="mw-list-item"><a href="/wiki/Special:MyContributions" title="A list of edits made from this IP address [y]" accesskey="y"><span>Contributions</span></a></li><li id="pt-anontalk" class="mw-list-item"><a href="/wiki/Special:MyTalk" title="Discussion about edits from this IP address [n]" accesskey="n"><span>Talk</span></a></li> </ul> </div> </div> </div> </div> </nav> </div> </header> </div> <div class="mw-page-container"> <div class="mw-page-container-inner"> <div class="vector-sitenotice-container"> <div id="siteNotice"><!-- CentralNotice --></div> </div> <div class="vector-column-start"> <div class="vector-main-menu-container"> <div id="mw-navigation"> <nav id="mw-panel" class="vector-main-menu-landmark" aria-label="Site"> <div id="vector-main-menu-pinned-container" class="vector-pinned-container"> </div> </nav> </div> </div> <div class="vector-sticky-pinned-container"> <nav id="mw-panel-toc" aria-label="Contents" data-event-name="ui.sidebar-toc" class="mw-table-of-contents-container vector-toc-landmark"> <div id="vector-toc-pinned-container" class="vector-pinned-container"> <div id="vector-toc" class="vector-toc vector-pinnable-element"> <div class="vector-pinnable-header vector-toc-pinnable-header vector-pinnable-header-pinned" data-feature-name="toc-pinned" data-pinnable-element-id="vector-toc" > <h2 class="vector-pinnable-header-label">Contents</h2> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-toc.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-toc.unpin">hide</button> </div> <ul class="vector-toc-contents" id="mw-panel-toc-list"> <li id="toc-mw-content-text" class="vector-toc-list-item vector-toc-level-1"> <a href="#" class="vector-toc-link"> <div class="vector-toc-text">(Top)</div> </a> </li> <li id="toc-Background" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Background"> <div class="vector-toc-text"> <span class="vector-toc-numb">1</span> <span>Background</span> </div> </a> <ul id="toc-Background-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Ontology" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Ontology"> <div class="vector-toc-text"> <span class="vector-toc-numb">2</span> <span>Ontology</span> </div> </a> <ul id="toc-Ontology-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Programming_abilities" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Programming_abilities"> <div class="vector-toc-text"> <span class="vector-toc-numb">3</span> <span>Programming abilities</span> </div> </a> <ul id="toc-Programming_abilities-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Advantages" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Advantages"> <div class="vector-toc-text"> <span class="vector-toc-numb">4</span> <span>Advantages</span> </div> </a> <ul id="toc-Advantages-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Limitations" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Limitations"> <div class="vector-toc-text"> <span class="vector-toc-numb">5</span> <span>Limitations</span> </div> </a> <ul id="toc-Limitations-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Example" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Example"> <div class="vector-toc-text"> <span class="vector-toc-numb">6</span> <span>Example</span> </div> </a> <ul id="toc-Example-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-GPUs_supported" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#GPUs_supported"> <div class="vector-toc-text"> <span class="vector-toc-numb">7</span> <span>GPUs supported</span> </div> </a> <ul id="toc-GPUs_supported-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Version_features_and_specifications" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Version_features_and_specifications"> <div class="vector-toc-text"> <span class="vector-toc-numb">8</span> <span>Version features and specifications</span> </div> </a> <button aria-controls="toc-Version_features_and_specifications-sublist" class="cdx-button cdx-button--weight-quiet cdx-button--icon-only vector-toc-toggle"> <span class="vector-icon mw-ui-icon-wikimedia-expand"></span> <span>Toggle Version features and specifications subsection</span> </button> <ul id="toc-Version_features_and_specifications-sublist" class="vector-toc-list"> <li id="toc-Data_types" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Data_types"> <div class="vector-toc-text"> <span class="vector-toc-numb">8.1</span> <span>Data types</span> </div> </a> <ul id="toc-Data_types-sublist" class="vector-toc-list"> <li id="toc-Floating-point_types" class="vector-toc-list-item vector-toc-level-3"> <a class="vector-toc-link" href="#Floating-point_types"> <div class="vector-toc-text"> <span class="vector-toc-numb">8.1.1</span> <span>Floating-point types</span> </div> </a> <ul id="toc-Floating-point_types-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Version_support" class="vector-toc-list-item vector-toc-level-3"> <a class="vector-toc-link" href="#Version_support"> <div class="vector-toc-text"> <span class="vector-toc-numb">8.1.2</span> <span>Version support</span> </div> </a> <ul id="toc-Version_support-sublist" class="vector-toc-list"> </ul> </li> </ul> </li> <li id="toc-Tensor_cores" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Tensor_cores"> <div class="vector-toc-text"> <span class="vector-toc-numb">8.2</span> <span>Tensor cores</span> </div> </a> <ul id="toc-Tensor_cores-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Technical_specification" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Technical_specification"> <div class="vector-toc-text"> <span class="vector-toc-numb">8.3</span> <span>Technical specification</span> </div> </a> <ul id="toc-Technical_specification-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Multiprocessor_architecture" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Multiprocessor_architecture"> <div class="vector-toc-text"> <span class="vector-toc-numb">8.4</span> <span>Multiprocessor architecture</span> </div> </a> <ul id="toc-Multiprocessor_architecture-sublist" class="vector-toc-list"> </ul> </li> </ul> </li> <li id="toc-Current_and_future_usages_of_CUDA_architecture" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Current_and_future_usages_of_CUDA_architecture"> <div class="vector-toc-text"> <span class="vector-toc-numb">9</span> <span>Current and future usages of CUDA architecture</span> </div> </a> <ul id="toc-Current_and_future_usages_of_CUDA_architecture-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Comparison_with_competitors" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Comparison_with_competitors"> <div class="vector-toc-text"> <span class="vector-toc-numb">10</span> <span>Comparison with competitors</span> </div> </a> <button aria-controls="toc-Comparison_with_competitors-sublist" class="cdx-button cdx-button--weight-quiet cdx-button--icon-only vector-toc-toggle"> <span class="vector-icon mw-ui-icon-wikimedia-expand"></span> <span>Toggle Comparison with competitors subsection</span> </button> <ul id="toc-Comparison_with_competitors-sublist" class="vector-toc-list"> <li id="toc-Intel_OneAPI" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#Intel_OneAPI"> <div class="vector-toc-text"> <span class="vector-toc-numb">10.1</span> <span>Intel OneAPI</span> </div> </a> <ul id="toc-Intel_OneAPI-sublist" class="vector-toc-list"> <li id="toc-Unified_Acceleration_Foundation_(UXL)" class="vector-toc-list-item vector-toc-level-3"> <a class="vector-toc-link" href="#Unified_Acceleration_Foundation_(UXL)"> <div class="vector-toc-text"> <span class="vector-toc-numb">10.1.1</span> <span>Unified Acceleration Foundation (UXL)</span> </div> </a> <ul id="toc-Unified_Acceleration_Foundation_(UXL)-sublist" class="vector-toc-list"> </ul> </li> </ul> </li> <li id="toc-AMD_ROCm" class="vector-toc-list-item vector-toc-level-2"> <a class="vector-toc-link" href="#AMD_ROCm"> <div class="vector-toc-text"> <span class="vector-toc-numb">10.2</span> <span>AMD ROCm</span> </div> </a> <ul id="toc-AMD_ROCm-sublist" class="vector-toc-list"> </ul> </li> </ul> </li> <li id="toc-See_also" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#See_also"> <div class="vector-toc-text"> <span class="vector-toc-numb">11</span> <span>See also</span> </div> </a> <ul id="toc-See_also-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-References" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#References"> <div class="vector-toc-text"> <span class="vector-toc-numb">12</span> <span>References</span> </div> </a> <ul id="toc-References-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-Further_reading" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#Further_reading"> <div class="vector-toc-text"> <span class="vector-toc-numb">13</span> <span>Further reading</span> </div> </a> <ul id="toc-Further_reading-sublist" class="vector-toc-list"> </ul> </li> <li id="toc-External_links" class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded"> <a class="vector-toc-link" href="#External_links"> <div class="vector-toc-text"> <span class="vector-toc-numb">14</span> <span>External links</span> </div> </a> <ul id="toc-External_links-sublist" class="vector-toc-list"> </ul> </li> </ul> </div> </div> </nav> </div> </div> <div class="mw-content-container"> <main id="content" class="mw-body"> <header class="mw-body-header vector-page-titlebar"> <nav aria-label="Contents" class="vector-toc-landmark"> <div id="vector-page-titlebar-toc" class="vector-dropdown vector-page-titlebar-toc vector-button-flush-left" title="Table of Contents" > <input type="checkbox" id="vector-page-titlebar-toc-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-page-titlebar-toc" class="vector-dropdown-checkbox " aria-label="Toggle the table of contents" > <label id="vector-page-titlebar-toc-label" for="vector-page-titlebar-toc-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-listBullet mw-ui-icon-wikimedia-listBullet"></span> <span class="vector-dropdown-label-text">Toggle the table of contents</span> </label> <div class="vector-dropdown-content"> <div id="vector-page-titlebar-toc-unpinned-container" class="vector-unpinned-container"> </div> </div> </div> </nav> <h1 id="firstHeading" class="firstHeading mw-first-heading"><span class="mw-page-title-main">CUDA</span></h1> <div id="p-lang-btn" class="vector-dropdown mw-portlet mw-portlet-lang" > <input type="checkbox" id="p-lang-btn-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-p-lang-btn" class="vector-dropdown-checkbox mw-interlanguage-selector" aria-label="Go to an article in another language. Available in 34 languages" > <label id="p-lang-btn-label" for="p-lang-btn-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--action-progressive mw-portlet-lang-heading-34" aria-hidden="true" ><span class="vector-icon mw-ui-icon-language-progressive mw-ui-icon-wikimedia-language-progressive"></span> <span class="vector-dropdown-label-text">34 languages</span> </label> <div class="vector-dropdown-content"> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li class="interlanguage-link interwiki-af mw-list-item"><a href="https://af.wikipedia.org/wiki/CUDA" title="CUDA – Afrikaans" lang="af" hreflang="af" data-title="CUDA" data-language-autonym="Afrikaans" data-language-local-name="Afrikaans" class="interlanguage-link-target"><span>Afrikaans</span></a></li><li class="interlanguage-link interwiki-ar mw-list-item"><a href="https://ar.wikipedia.org/wiki/%D9%83%D9%88%D8%AF%D8%A7" title="كودا – Arabic" lang="ar" hreflang="ar" data-title="كودا" data-language-autonym="العربية" data-language-local-name="Arabic" class="interlanguage-link-target"><span>العربية</span></a></li><li class="interlanguage-link interwiki-az mw-list-item"><a href="https://az.wikipedia.org/wiki/CUDA" title="CUDA – Azerbaijani" lang="az" hreflang="az" data-title="CUDA" data-language-autonym="Azərbaycanca" data-language-local-name="Azerbaijani" class="interlanguage-link-target"><span>Azərbaycanca</span></a></li><li class="interlanguage-link interwiki-bg mw-list-item"><a href="https://bg.wikipedia.org/wiki/CUDA" title="CUDA – Bulgarian" lang="bg" hreflang="bg" data-title="CUDA" data-language-autonym="Български" data-language-local-name="Bulgarian" class="interlanguage-link-target"><span>Български</span></a></li><li class="interlanguage-link interwiki-ca mw-list-item"><a href="https://ca.wikipedia.org/wiki/CUDA" title="CUDA – Catalan" lang="ca" hreflang="ca" data-title="CUDA" data-language-autonym="Català" data-language-local-name="Catalan" class="interlanguage-link-target"><span>Català</span></a></li><li class="interlanguage-link interwiki-cs mw-list-item"><a href="https://cs.wikipedia.org/wiki/CUDA" title="CUDA – Czech" lang="cs" hreflang="cs" data-title="CUDA" data-language-autonym="Čeština" data-language-local-name="Czech" class="interlanguage-link-target"><span>Čeština</span></a></li><li class="interlanguage-link interwiki-de mw-list-item"><a href="https://de.wikipedia.org/wiki/CUDA" title="CUDA – German" lang="de" hreflang="de" data-title="CUDA" data-language-autonym="Deutsch" data-language-local-name="German" class="interlanguage-link-target"><span>Deutsch</span></a></li><li class="interlanguage-link interwiki-et mw-list-item"><a href="https://et.wikipedia.org/wiki/CUDA" title="CUDA – Estonian" lang="et" hreflang="et" data-title="CUDA" data-language-autonym="Eesti" data-language-local-name="Estonian" class="interlanguage-link-target"><span>Eesti</span></a></li><li class="interlanguage-link interwiki-es mw-list-item"><a href="https://es.wikipedia.org/wiki/CUDA" title="CUDA – Spanish" lang="es" hreflang="es" data-title="CUDA" data-language-autonym="Español" data-language-local-name="Spanish" class="interlanguage-link-target"><span>Español</span></a></li><li class="interlanguage-link interwiki-eu mw-list-item"><a href="https://eu.wikipedia.org/wiki/CUDA" title="CUDA – Basque" lang="eu" hreflang="eu" data-title="CUDA" data-language-autonym="Euskara" data-language-local-name="Basque" class="interlanguage-link-target"><span>Euskara</span></a></li><li class="interlanguage-link interwiki-fa mw-list-item"><a href="https://fa.wikipedia.org/wiki/%DA%A9%D9%88%D8%AF%D8%A7" title="کودا – Persian" lang="fa" hreflang="fa" data-title="کودا" data-language-autonym="فارسی" data-language-local-name="Persian" class="interlanguage-link-target"><span>فارسی</span></a></li><li class="interlanguage-link interwiki-fr mw-list-item"><a href="https://fr.wikipedia.org/wiki/Compute_Unified_Device_Architecture" title="Compute Unified Device Architecture – French" lang="fr" hreflang="fr" data-title="Compute Unified Device Architecture" data-language-autonym="Français" data-language-local-name="French" class="interlanguage-link-target"><span>Français</span></a></li><li class="interlanguage-link interwiki-ko mw-list-item"><a href="https://ko.wikipedia.org/wiki/CUDA" title="CUDA – Korean" lang="ko" hreflang="ko" data-title="CUDA" data-language-autonym="한국어" data-language-local-name="Korean" class="interlanguage-link-target"><span>한국어</span></a></li><li class="interlanguage-link interwiki-it mw-list-item"><a href="https://it.wikipedia.org/wiki/CUDA" title="CUDA – Italian" lang="it" hreflang="it" data-title="CUDA" data-language-autonym="Italiano" data-language-local-name="Italian" class="interlanguage-link-target"><span>Italiano</span></a></li><li class="interlanguage-link interwiki-he mw-list-item"><a href="https://he.wikipedia.org/wiki/CUDA" title="CUDA – Hebrew" lang="he" hreflang="he" data-title="CUDA" data-language-autonym="עברית" data-language-local-name="Hebrew" class="interlanguage-link-target"><span>עברית</span></a></li><li class="interlanguage-link interwiki-jv mw-list-item"><a href="https://jv.wikipedia.org/wiki/CUDA" title="CUDA – Javanese" lang="jv" hreflang="jv" data-title="CUDA" data-language-autonym="Jawa" data-language-local-name="Javanese" class="interlanguage-link-target"><span>Jawa</span></a></li><li class="interlanguage-link interwiki-ka mw-list-item"><a href="https://ka.wikipedia.org/wiki/CUDA" title="CUDA – Georgian" lang="ka" hreflang="ka" data-title="CUDA" data-language-autonym="ქართული" data-language-local-name="Georgian" class="interlanguage-link-target"><span>ქართული</span></a></li><li class="interlanguage-link interwiki-mk mw-list-item"><a href="https://mk.wikipedia.org/wiki/CUDA" title="CUDA – Macedonian" lang="mk" hreflang="mk" data-title="CUDA" data-language-autonym="Македонски" data-language-local-name="Macedonian" class="interlanguage-link-target"><span>Македонски</span></a></li><li class="interlanguage-link interwiki-ml mw-list-item"><a href="https://ml.wikipedia.org/wiki/%E0%B4%95%E0%B5%8D%E0%B4%AF%E0%B5%82%E0%B4%A1" title="ക്യൂഡ – Malayalam" lang="ml" hreflang="ml" data-title="ക്യൂഡ" data-language-autonym="മലയാളം" data-language-local-name="Malayalam" class="interlanguage-link-target"><span>മലയാളം</span></a></li><li class="interlanguage-link interwiki-nl mw-list-item"><a href="https://nl.wikipedia.org/wiki/CUDA" title="CUDA – Dutch" lang="nl" hreflang="nl" data-title="CUDA" data-language-autonym="Nederlands" data-language-local-name="Dutch" class="interlanguage-link-target"><span>Nederlands</span></a></li><li class="interlanguage-link interwiki-ja mw-list-item"><a href="https://ja.wikipedia.org/wiki/CUDA" title="CUDA – Japanese" lang="ja" hreflang="ja" data-title="CUDA" data-language-autonym="日本語" data-language-local-name="Japanese" class="interlanguage-link-target"><span>日本語</span></a></li><li class="interlanguage-link interwiki-pl mw-list-item"><a href="https://pl.wikipedia.org/wiki/CUDA" title="CUDA – Polish" lang="pl" hreflang="pl" data-title="CUDA" data-language-autonym="Polski" data-language-local-name="Polish" class="interlanguage-link-target"><span>Polski</span></a></li><li class="interlanguage-link interwiki-pt mw-list-item"><a href="https://pt.wikipedia.org/wiki/CUDA" title="CUDA – Portuguese" lang="pt" hreflang="pt" data-title="CUDA" data-language-autonym="Português" data-language-local-name="Portuguese" class="interlanguage-link-target"><span>Português</span></a></li><li class="interlanguage-link interwiki-ro mw-list-item"><a href="https://ro.wikipedia.org/wiki/CUDA" title="CUDA – Romanian" lang="ro" hreflang="ro" data-title="CUDA" data-language-autonym="Română" data-language-local-name="Romanian" class="interlanguage-link-target"><span>Română</span></a></li><li class="interlanguage-link interwiki-ru mw-list-item"><a href="https://ru.wikipedia.org/wiki/CUDA" title="CUDA – Russian" lang="ru" hreflang="ru" data-title="CUDA" data-language-autonym="Русский" data-language-local-name="Russian" class="interlanguage-link-target"><span>Русский</span></a></li><li class="interlanguage-link interwiki-sr mw-list-item"><a href="https://sr.wikipedia.org/wiki/CUDA" title="CUDA – Serbian" lang="sr" hreflang="sr" data-title="CUDA" data-language-autonym="Српски / srpski" data-language-local-name="Serbian" class="interlanguage-link-target"><span>Српски / srpski</span></a></li><li class="interlanguage-link interwiki-fi mw-list-item"><a href="https://fi.wikipedia.org/wiki/CUDA" title="CUDA – Finnish" lang="fi" hreflang="fi" data-title="CUDA" data-language-autonym="Suomi" data-language-local-name="Finnish" class="interlanguage-link-target"><span>Suomi</span></a></li><li class="interlanguage-link interwiki-sv mw-list-item"><a href="https://sv.wikipedia.org/wiki/CUDA" title="CUDA – Swedish" lang="sv" hreflang="sv" data-title="CUDA" data-language-autonym="Svenska" data-language-local-name="Swedish" class="interlanguage-link-target"><span>Svenska</span></a></li><li class="interlanguage-link interwiki-th mw-list-item"><a href="https://th.wikipedia.org/wiki/CUDA" title="CUDA – Thai" lang="th" hreflang="th" data-title="CUDA" data-language-autonym="ไทย" data-language-local-name="Thai" class="interlanguage-link-target"><span>ไทย</span></a></li><li class="interlanguage-link interwiki-tr mw-list-item"><a href="https://tr.wikipedia.org/wiki/CUDA" title="CUDA – Turkish" lang="tr" hreflang="tr" data-title="CUDA" data-language-autonym="Türkçe" data-language-local-name="Turkish" class="interlanguage-link-target"><span>Türkçe</span></a></li><li class="interlanguage-link interwiki-uk mw-list-item"><a href="https://uk.wikipedia.org/wiki/CUDA" title="CUDA – Ukrainian" lang="uk" hreflang="uk" data-title="CUDA" data-language-autonym="Українська" data-language-local-name="Ukrainian" class="interlanguage-link-target"><span>Українська</span></a></li><li class="interlanguage-link interwiki-vi mw-list-item"><a href="https://vi.wikipedia.org/wiki/CUDA" title="CUDA – Vietnamese" lang="vi" hreflang="vi" data-title="CUDA" data-language-autonym="Tiếng Việt" data-language-local-name="Vietnamese" class="interlanguage-link-target"><span>Tiếng Việt</span></a></li><li class="interlanguage-link interwiki-zh-yue mw-list-item"><a href="https://zh-yue.wikipedia.org/wiki/CUDA" title="CUDA – Cantonese" lang="yue" hreflang="yue" data-title="CUDA" data-language-autonym="粵語" data-language-local-name="Cantonese" class="interlanguage-link-target"><span>粵語</span></a></li><li class="interlanguage-link interwiki-zh mw-list-item"><a href="https://zh.wikipedia.org/wiki/CUDA" title="CUDA – Chinese" lang="zh" hreflang="zh" data-title="CUDA" data-language-autonym="中文" data-language-local-name="Chinese" class="interlanguage-link-target"><span>中文</span></a></li> </ul> <div class="after-portlet after-portlet-lang"><span class="wb-langlinks-edit wb-langlinks-link"><a href="https://www.wikidata.org/wiki/Special:EntityPage/Q477690#sitelinks-wikipedia" title="Edit interlanguage links" class="wbc-editpage">Edit links</a></span></div> </div> </div> </div> </header> <div class="vector-page-toolbar"> <div class="vector-page-toolbar-container"> <div id="left-navigation"> <nav aria-label="Namespaces"> <div id="p-associated-pages" class="vector-menu vector-menu-tabs mw-portlet mw-portlet-associated-pages" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-nstab-main" class="selected vector-tab-noicon mw-list-item"><a href="/wiki/CUDA" title="View the content page [c]" accesskey="c"><span>Article</span></a></li><li id="ca-talk" class="vector-tab-noicon mw-list-item"><a href="/wiki/Talk:CUDA" rel="discussion" title="Discuss improvements to the content page [t]" accesskey="t"><span>Talk</span></a></li> </ul> </div> </div> <div id="vector-variants-dropdown" class="vector-dropdown emptyPortlet" > <input type="checkbox" id="vector-variants-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-variants-dropdown" class="vector-dropdown-checkbox " aria-label="Change language variant" > <label id="vector-variants-dropdown-label" for="vector-variants-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet" aria-hidden="true" ><span class="vector-dropdown-label-text">English</span> </label> <div class="vector-dropdown-content"> <div id="p-variants" class="vector-menu mw-portlet mw-portlet-variants emptyPortlet" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> </ul> </div> </div> </div> </div> </nav> </div> <div id="right-navigation" class="vector-collapsible"> <nav aria-label="Views"> <div id="p-views" class="vector-menu vector-menu-tabs mw-portlet mw-portlet-views" > <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-view" class="selected vector-tab-noicon mw-list-item"><a href="/wiki/CUDA"><span>Read</span></a></li><li id="ca-edit" class="vector-tab-noicon mw-list-item"><a href="/w/index.php?title=CUDA&amp;action=edit" title="Edit this page [e]" accesskey="e"><span>Edit</span></a></li><li id="ca-history" class="vector-tab-noicon mw-list-item"><a href="/w/index.php?title=CUDA&amp;action=history" title="Past revisions of this page [h]" accesskey="h"><span>View history</span></a></li> </ul> </div> </div> </nav> <nav class="vector-page-tools-landmark" aria-label="Page tools"> <div id="vector-page-tools-dropdown" class="vector-dropdown vector-page-tools-dropdown" > <input type="checkbox" id="vector-page-tools-dropdown-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-page-tools-dropdown" class="vector-dropdown-checkbox " aria-label="Tools" > <label id="vector-page-tools-dropdown-label" for="vector-page-tools-dropdown-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet" aria-hidden="true" ><span class="vector-dropdown-label-text">Tools</span> </label> <div class="vector-dropdown-content"> <div id="vector-page-tools-unpinned-container" class="vector-unpinned-container"> <div id="vector-page-tools" class="vector-page-tools vector-pinnable-element"> <div class="vector-pinnable-header vector-page-tools-pinnable-header vector-pinnable-header-unpinned" data-feature-name="page-tools-pinned" data-pinnable-element-id="vector-page-tools" data-pinned-container-id="vector-page-tools-pinned-container" data-unpinned-container-id="vector-page-tools-unpinned-container" > <div class="vector-pinnable-header-label">Tools</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-page-tools.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-page-tools.unpin">hide</button> </div> <div id="p-cactions" class="vector-menu mw-portlet mw-portlet-cactions emptyPortlet vector-has-collapsible-items" title="More options" > <div class="vector-menu-heading"> Actions </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="ca-more-view" class="selected vector-more-collapsible-item mw-list-item"><a href="/wiki/CUDA"><span>Read</span></a></li><li id="ca-more-edit" class="vector-more-collapsible-item mw-list-item"><a href="/w/index.php?title=CUDA&amp;action=edit" title="Edit this page [e]" accesskey="e"><span>Edit</span></a></li><li id="ca-more-history" class="vector-more-collapsible-item mw-list-item"><a href="/w/index.php?title=CUDA&amp;action=history"><span>View history</span></a></li> </ul> </div> </div> <div id="p-tb" class="vector-menu mw-portlet mw-portlet-tb" > <div class="vector-menu-heading"> General </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="t-whatlinkshere" class="mw-list-item"><a href="/wiki/Special:WhatLinksHere/CUDA" title="List of all English Wikipedia pages containing links to this page [j]" accesskey="j"><span>What links here</span></a></li><li id="t-recentchangeslinked" class="mw-list-item"><a href="/wiki/Special:RecentChangesLinked/CUDA" rel="nofollow" title="Recent changes in pages linked from this page [k]" accesskey="k"><span>Related changes</span></a></li><li id="t-upload" class="mw-list-item"><a href="//en.wikipedia.org/wiki/Wikipedia:File_Upload_Wizard" title="Upload files [u]" accesskey="u"><span>Upload file</span></a></li><li id="t-permalink" class="mw-list-item"><a href="/w/index.php?title=CUDA&amp;oldid=1282172238" title="Permanent link to this revision of this page"><span>Permanent link</span></a></li><li id="t-info" class="mw-list-item"><a href="/w/index.php?title=CUDA&amp;action=info" title="More information about this page"><span>Page information</span></a></li><li id="t-cite" class="mw-list-item"><a href="/w/index.php?title=Special:CiteThisPage&amp;page=CUDA&amp;id=1282172238&amp;wpFormIdentifier=titleform" title="Information on how to cite this page"><span>Cite this page</span></a></li><li id="t-urlshortener" class="mw-list-item"><a href="/w/index.php?title=Special:UrlShortener&amp;url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FCUDA"><span>Get shortened URL</span></a></li><li id="t-urlshortener-qrcode" class="mw-list-item"><a href="/w/index.php?title=Special:QrCode&amp;url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FCUDA"><span>Download QR code</span></a></li> </ul> </div> </div> <div id="p-coll-print_export" class="vector-menu mw-portlet mw-portlet-coll-print_export" > <div class="vector-menu-heading"> Print/export </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li id="coll-download-as-rl" class="mw-list-item"><a href="/w/index.php?title=Special:DownloadAsPdf&amp;page=CUDA&amp;action=show-download-screen" title="Download this page as a PDF file"><span>Download as PDF</span></a></li><li id="t-print" class="mw-list-item"><a href="/w/index.php?title=CUDA&amp;printable=yes" title="Printable version of this page [p]" accesskey="p"><span>Printable version</span></a></li> </ul> </div> </div> <div id="p-wikibase-otherprojects" class="vector-menu mw-portlet mw-portlet-wikibase-otherprojects" > <div class="vector-menu-heading"> In other projects </div> <div class="vector-menu-content"> <ul class="vector-menu-content-list"> <li class="wb-otherproject-link wb-otherproject-commons mw-list-item"><a href="https://commons.wikimedia.org/wiki/Category:CUDA" hreflang="en"><span>Wikimedia Commons</span></a></li><li id="t-wikibase" class="wb-otherproject-link wb-otherproject-wikibase-dataitem mw-list-item"><a href="https://www.wikidata.org/wiki/Special:EntityPage/Q477690" title="Structured data on this page hosted by Wikidata [g]" accesskey="g"><span>Wikidata item</span></a></li> </ul> </div> </div> </div> </div> </div> </div> </nav> </div> </div> </div> <div class="vector-column-end"> <div class="vector-sticky-pinned-container"> <nav class="vector-page-tools-landmark" aria-label="Page tools"> <div id="vector-page-tools-pinned-container" class="vector-pinned-container"> </div> </nav> <nav class="vector-appearance-landmark" aria-label="Appearance"> <div id="vector-appearance-pinned-container" class="vector-pinned-container"> <div id="vector-appearance" class="vector-appearance vector-pinnable-element"> <div class="vector-pinnable-header vector-appearance-pinnable-header vector-pinnable-header-pinned" data-feature-name="appearance-pinned" data-pinnable-element-id="vector-appearance" data-pinned-container-id="vector-appearance-pinned-container" data-unpinned-container-id="vector-appearance-unpinned-container" > <div class="vector-pinnable-header-label">Appearance</div> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-appearance.pin">move to sidebar</button> <button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-appearance.unpin">hide</button> </div> </div> </div> </nav> </div> </div> <div id="bodyContent" class="vector-body" aria-labelledby="firstHeading" data-mw-ve-target-container> <div class="vector-body-before-content"> <div class="mw-indicators"> </div> <div id="siteSub" class="noprint">From Wikipedia, the free encyclopedia</div> </div> <div id="contentSub"><div id="mw-content-subtitle"></div></div> <div id="mw-content-text" class="mw-body-content"><div class="mw-content-ltr mw-parser-output" lang="en" dir="ltr"><div class="shortdescription nomobile noexcerpt noprint searchaux" style="display:none">Parallel computing platform and programming model</div> <style data-mw-deduplicate="TemplateStyles:r1251242444">.mw-parser-output .ambox{border:1px solid #a2a9b1;border-left:10px solid #36c;background-color:#fbfbfb;box-sizing:border-box}.mw-parser-output .ambox+link+.ambox,.mw-parser-output .ambox+link+style+.ambox,.mw-parser-output .ambox+link+link+.ambox,.mw-parser-output .ambox+.mw-empty-elt+link+.ambox,.mw-parser-output .ambox+.mw-empty-elt+link+style+.ambox,.mw-parser-output .ambox+.mw-empty-elt+link+link+.ambox{margin-top:-1px}html body.mediawiki .mw-parser-output .ambox.mbox-small-left{margin:4px 1em 4px 0;overflow:hidden;width:238px;border-collapse:collapse;font-size:88%;line-height:1.25em}.mw-parser-output .ambox-speedy{border-left:10px solid #b32424;background-color:#fee7e6}.mw-parser-output .ambox-delete{border-left:10px solid #b32424}.mw-parser-output .ambox-content{border-left:10px solid #f28500}.mw-parser-output .ambox-style{border-left:10px solid #fc3}.mw-parser-output .ambox-move{border-left:10px solid #9932cc}.mw-parser-output .ambox-protection{border-left:10px solid #a2a9b1}.mw-parser-output .ambox .mbox-text{border:none;padding:0.25em 0.5em;width:100%}.mw-parser-output .ambox .mbox-image{border:none;padding:2px 0 2px 0.5em;text-align:center}.mw-parser-output .ambox .mbox-imageright{border:none;padding:2px 0.5em 2px 0;text-align:center}.mw-parser-output .ambox .mbox-empty-cell{border:none;padding:0;width:1px}.mw-parser-output .ambox .mbox-image-div{width:52px}@media(min-width:720px){.mw-parser-output .ambox{margin:0 10%}}@media print{body.ns-0 .mw-parser-output .ambox{display:none!important}}</style><style data-mw-deduplicate="TemplateStyles:r1248332772">.mw-parser-output .multiple-issues-text{width:95%;margin:0.2em 0}.mw-parser-output .multiple-issues-text>.mw-collapsible-content{margin-top:0.3em}.mw-parser-output .compact-ambox .ambox{border:none;border-collapse:collapse;background-color:transparent;margin:0 0 0 1.6em!important;padding:0!important;width:auto;display:block}body.mediawiki .mw-parser-output .compact-ambox .ambox.mbox-small-left{font-size:100%;width:auto;margin:0}.mw-parser-output .compact-ambox .ambox .mbox-text{padding:0!important;margin:0!important}.mw-parser-output .compact-ambox .ambox .mbox-text-span{display:list-item;line-height:1.5em;list-style-type:disc}body.skin-minerva .mw-parser-output .multiple-issues-text>.mw-collapsible-toggle,.mw-parser-output .compact-ambox .ambox .mbox-image,.mw-parser-output .compact-ambox .ambox .mbox-imageright,.mw-parser-output .compact-ambox .ambox .mbox-empty-cell,.mw-parser-output .compact-ambox .hide-when-compact{display:none}</style><table class="box-Multiple_issues plainlinks metadata ambox ambox-content ambox-multiple_issues compact-ambox" role="presentation"><tbody><tr><td class="mbox-image"><div class="mbox-image-div"><span typeof="mw:File"><span><img alt="" src="//upload.wikimedia.org/wikipedia/en/thumb/b/b4/Ambox_important.svg/40px-Ambox_important.svg.png" decoding="async" width="40" height="40" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/en/thumb/b/b4/Ambox_important.svg/60px-Ambox_important.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/b/b4/Ambox_important.svg/80px-Ambox_important.svg.png 2x" data-file-width="40" data-file-height="40" /></span></span></div></td><td class="mbox-text"><div class="mbox-text-span"><div class="multiple-issues-text mw-collapsible"><b>This article has multiple issues.</b> Please help <b><a href="/wiki/Special:EditPage/CUDA" title="Special:EditPage/CUDA">improve it</a></b> or discuss these issues on the <b><a href="/wiki/Talk:CUDA" title="Talk:CUDA">talk page</a></b>. <small><i>(<a href="/wiki/Help:Maintenance_template_removal" title="Help:Maintenance template removal">Learn how and when to remove these messages</a>)</i></small> <div class="mw-collapsible-content"> <link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1251242444" /><table class="box-Howto plainlinks metadata ambox ambox-content" role="presentation"><tbody><tr><td class="mbox-image"><div class="mbox-image-div"><span typeof="mw:File"><span><img alt="" src="//upload.wikimedia.org/wikipedia/en/thumb/b/b4/Ambox_important.svg/40px-Ambox_important.svg.png" decoding="async" width="40" height="40" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/en/thumb/b/b4/Ambox_important.svg/60px-Ambox_important.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/b/b4/Ambox_important.svg/80px-Ambox_important.svg.png 2x" data-file-width="40" data-file-height="40" /></span></span></div></td><td class="mbox-text"><div class="mbox-text-span">This article <b>contains <a href="/wiki/Wikipedia:What_Wikipedia_is_not#NOTHOWTO" title="Wikipedia:What Wikipedia is not">instructions, advice, or how-to content</a></b>.<span class="hide-when-compact"> Please help <a class="external text" href="https://en.wikipedia.org/w/index.php?title=CUDA&amp;action=edit">rewrite the content</a> so that it is more encyclopedic or <a href="https://meta.wikimedia.org/wiki/Help:Transwiki" class="extiw" title="m:Help:Transwiki">move</a> it to <a href="https://en.wikiversity.org/wiki/" class="extiw" title="v:">Wikiversity</a>, <a href="https://en.wikibooks.org/wiki/" class="extiw" title="b:">Wikibooks</a>, or <a href="https://en.wikivoyage.org/wiki/" class="extiw" title="voy:">Wikivoyage</a>.</span> <span class="date-container"><i>(<span class="date">February 2024</span>)</i></span></div></td></tr></tbody></table> <link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1251242444" /><table class="box-Pro_and_con_list plainlinks metadata ambox ambox-style" role="presentation"><tbody><tr><td class="mbox-image"><div class="mbox-image-div"><span typeof="mw:File"><span><img alt="" src="//upload.wikimedia.org/wikipedia/en/thumb/f/f2/Edit-clear.svg/40px-Edit-clear.svg.png" decoding="async" width="40" height="40" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/en/thumb/f/f2/Edit-clear.svg/60px-Edit-clear.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/f/f2/Edit-clear.svg/80px-Edit-clear.svg.png 2x" data-file-width="48" data-file-height="48" /></span></span></div></td><td class="mbox-text"><div class="mbox-text-span">This article <b>contains a <a href="/wiki/Wikipedia:Pro_and_con_lists" title="Wikipedia:Pro and con lists">pro and con list</a></b>.<span class="hide-when-compact"> Please help <a class="external text" href="https://en.wikipedia.org/w/index.php?title=CUDA&amp;action=edit">rewriting it</a> into consolidated sections based on topics.</span> <span class="date-container"><i>(<span class="date">February 2024</span>)</i></span></div></td></tr></tbody></table> </div> </div><span class="hide-when-compact"><i> (<small><a href="/wiki/Help:Maintenance_template_removal" title="Help:Maintenance template removal">Learn how and when to remove this message</a></small>)</i></span></div></td></tr></tbody></table> <style data-mw-deduplicate="TemplateStyles:r1236090951">.mw-parser-output .hatnote{font-style:italic}.mw-parser-output div.hatnote{padding-left:1.6em;margin-bottom:0.5em}.mw-parser-output .hatnote i{font-style:normal}.mw-parser-output .hatnote+link+.hatnote{margin-top:-0.5em}@media print{body.ns-0 .mw-parser-output .hatnote{display:none!important}}</style><div role="note" class="hatnote navigation-not-searchable">For other uses, see <a href="/wiki/Cuda_(disambiguation)" class="mw-redirect mw-disambig" title="Cuda (disambiguation)">Cuda (disambiguation)</a>.</div> <style data-mw-deduplicate="TemplateStyles:r1257001546">.mw-parser-output .infobox-subbox{padding:0;border:none;margin:-3px;width:auto;min-width:100%;font-size:100%;clear:none;float:none;background-color:transparent}.mw-parser-output .infobox-3cols-child{margin:auto}.mw-parser-output .infobox .navbar{font-size:100%}@media screen{html.skin-theme-clientpref-night .mw-parser-output .infobox-full-data:not(.notheme)>div:not(.notheme)[style]{background:#1f1f23!important;color:#f8f9fa}}@media screen and (prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .infobox-full-data:not(.notheme) div:not(.notheme){background:#1f1f23!important;color:#f8f9fa}}@media(min-width:640px){body.skin--responsive .mw-parser-output .infobox-table{display:table!important}body.skin--responsive .mw-parser-output .infobox-table>caption{display:table-caption!important}body.skin--responsive .mw-parser-output .infobox-table>tbody{display:table-row-group}body.skin--responsive .mw-parser-output .infobox-table tr{display:table-row!important}body.skin--responsive .mw-parser-output .infobox-table th,body.skin--responsive .mw-parser-output .infobox-table td{padding-left:inherit;padding-right:inherit}}</style><table class="infobox vevent"><caption class="infobox-title summary">CUDA</caption><tbody><tr><td colspan="2" class="infobox-image logo"><span class="mw-default-size" typeof="mw:File/Frameless"><a href="/wiki/File:Nvidia_CUDA_Logo.jpg" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/b/b9/Nvidia_CUDA_Logo.jpg/220px-Nvidia_CUDA_Logo.jpg" decoding="async" width="220" height="133" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/b/b9/Nvidia_CUDA_Logo.jpg/330px-Nvidia_CUDA_Logo.jpg 1.5x, //upload.wikimedia.org/wikipedia/commons/b/b9/Nvidia_CUDA_Logo.jpg 2x" data-file-width="406" data-file-height="246" /></a></span></td></tr><tr><th scope="row" class="infobox-label" style="white-space: nowrap;"><a href="/wiki/Programmer" title="Programmer">Developer(s)</a></th><td class="infobox-data"><a href="/wiki/Nvidia" title="Nvidia">Nvidia</a></td></tr><tr><th scope="row" class="infobox-label" style="white-space: nowrap;">Initial release</th><td class="infobox-data">February&#160;16, 2007<span class="noprint">&#59;&#32;18 years ago</span><span style="display:none">&#160;(<span class="bday dtstart published updated">2007-02-16</span>)</span><sup id="cite_ref-1" class="reference"><a href="#cite_note-1"><span class="cite-bracket">&#91;</span>1<span class="cite-bracket">&#93;</span></a></sup></td></tr><tr style="display: none;"><td colspan="2" class="infobox-full-data"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1257001546" /></td></tr><tr><th scope="row" class="infobox-label" style="white-space: nowrap;"><a href="/wiki/Software_release_life_cycle" title="Software release life cycle">Stable release</a></th><td class="infobox-data"><div style="margin:0px;">12.8 / January&#160;2025<span class="noprint">&#59;&#32;2&#160;months ago</span><span style="display:none">&#160;(<span class="bday dtstart published updated">2025-01</span>)</span></div></td></tr><tr style="display:none"><td colspan="2"> </td></tr><tr><th scope="row" class="infobox-label" style="white-space: nowrap;"><a href="/wiki/Operating_system" title="Operating system">Operating system</a></th><td class="infobox-data"><a href="/wiki/Windows" class="mw-redirect" title="Windows">Windows</a>, <a href="/wiki/Linux" title="Linux">Linux</a></td></tr><tr><th scope="row" class="infobox-label" style="white-space: nowrap;"><a href="/wiki/Computing_platform" title="Computing platform">Platform</a></th><td class="infobox-data"><a href="#GPUs_supported">Supported GPUs</a></td></tr><tr><th scope="row" class="infobox-label" style="white-space: nowrap;"><a href="/wiki/Software_categories#Categorization_approaches" title="Software categories">Type</a></th><td class="infobox-data"><a href="/wiki/GPGPU" class="mw-redirect" title="GPGPU">GPGPU</a></td></tr><tr><th scope="row" class="infobox-label" style="white-space: nowrap;"><a href="/wiki/Software_license" title="Software license">License</a></th><td class="infobox-data"><a href="/wiki/Proprietary_software" title="Proprietary software">Proprietary</a></td></tr><tr><th scope="row" class="infobox-label" style="white-space: nowrap;">Website</th><td class="infobox-data"><span class="url"><a rel="nofollow" class="external text" href="https://developer.nvidia.com/cuda-zone">developer<wbr />.nvidia<wbr />.com<wbr />/cuda-zone</a></span></td></tr></tbody></table> <p>In <a href="/wiki/Computing" title="Computing">computing</a>, <b>CUDA</b> (<b>C</b>ompute <b>U</b>nified <b>D</b>evice <b>A</b>rchitecture) is a proprietary<sup id="cite_ref-:0_2-0" class="reference"><a href="#cite_note-:0-2"><span class="cite-bracket">&#91;</span>2<span class="cite-bracket">&#93;</span></a></sup> <a href="/wiki/Parallel_computing" title="Parallel computing">parallel computing</a> platform and <a href="/wiki/Application_programming_interface" class="mw-redirect" title="Application programming interface">application programming interface</a> (API) that allows software to use certain types of <a href="/wiki/Graphics_processing_units" class="mw-redirect" title="Graphics processing units">graphics processing units</a> (GPUs) for accelerated general-purpose processing, an approach called general-purpose computing on GPUs. CUDA was created by <a href="/wiki/Nvidia" title="Nvidia">Nvidia</a> in 2006.<sup id="cite_ref-3" class="reference"><a href="#cite_note-3"><span class="cite-bracket">&#91;</span>3<span class="cite-bracket">&#93;</span></a></sup> When it was first introduced, the name was an acronym for <i>Compute Unified Device Architecture</i>,<sup id="cite_ref-CUDA_intro_-_AnandTech_4-0" class="reference"><a href="#cite_note-CUDA_intro_-_AnandTech-4"><span class="cite-bracket">&#91;</span>4<span class="cite-bracket">&#93;</span></a></sup> but Nvidia later <a href="/wiki/Orphan_initialism" class="mw-redirect" title="Orphan initialism">dropped</a> the common use of the acronym and now rarely expands it.<sup id="cite_ref-5" class="reference"><a href="#cite_note-5"><span class="cite-bracket">&#91;</span>5<span class="cite-bracket">&#93;</span></a></sup> </p><p>CUDA is a software layer that gives direct access to the GPU's virtual <a href="/wiki/Instruction_set" class="mw-redirect" title="Instruction set">instruction set</a> and parallel computational elements for the execution of <a href="/wiki/Compute_kernel" title="Compute kernel">compute kernels</a>.<sup id="cite_ref-CUDA_intro_-_TomsHardware_6-0" class="reference"><a href="#cite_note-CUDA_intro_-_TomsHardware-6"><span class="cite-bracket">&#91;</span>6<span class="cite-bracket">&#93;</span></a></sup> In addition to <a href="/wiki/Driver_(computer)" class="mw-redirect" title="Driver (computer)">drivers</a> and runtime kernels, the CUDA platform includes compilers, libraries and developer tools to help programmers accelerate their applications. </p><p>CUDA is designed to work with programming languages such as <a href="/wiki/C_(programming_language)" title="C (programming language)">C</a>, <a href="/wiki/C%2B%2B" title="C++">C++</a>, <a href="/wiki/Fortran" title="Fortran">Fortran</a>, <a href="/wiki/Python_(programming_language)" title="Python (programming language)">Python</a> and <a href="/wiki/Julia_(programming_language)" title="Julia (programming language)">Julia</a>. This accessibility makes it easier for specialists in parallel programming to use GPU resources, in contrast to prior APIs like <a href="/wiki/Direct3D" title="Direct3D">Direct3D</a> and <a href="/wiki/OpenGL" title="OpenGL">OpenGL</a>, which require advanced skills in graphics programming.<sup id="cite_ref-7" class="reference"><a href="#cite_note-7"><span class="cite-bracket">&#91;</span>7<span class="cite-bracket">&#93;</span></a></sup> CUDA-powered GPUs also support programming frameworks such as <a href="/wiki/OpenMP" title="OpenMP">OpenMP</a>, <a href="/wiki/OpenACC" title="OpenACC">OpenACC</a> and <a href="/wiki/OpenCL" title="OpenCL">OpenCL</a>.<sup id="cite_ref-8" class="reference"><a href="#cite_note-8"><span class="cite-bracket">&#91;</span>8<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-CUDA_intro_-_TomsHardware_6-1" class="reference"><a href="#cite_note-CUDA_intro_-_TomsHardware-6"><span class="cite-bracket">&#91;</span>6<span class="cite-bracket">&#93;</span></a></sup> </p> <meta property="mw:PageProp/toc" /> <div class="mw-heading mw-heading2"><h2 id="Background">Background</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=CUDA&amp;action=edit&amp;section=1" title="Edit section: Background"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1236090951" /><div role="note" class="hatnote navigation-not-searchable">Further information: <a href="/wiki/Graphics_processing_unit" title="Graphics processing unit">Graphics processing unit</a></div> <p>The graphics processing unit (GPU), as a specialized computer processor, addresses the demands of <a href="/wiki/Real-time_computer_graphics" title="Real-time computer graphics">real-time</a> high-resolution <a href="/wiki/3D_graphics" class="mw-redirect" title="3D graphics">3D graphics</a> compute-intensive tasks. By 2012, GPUs had evolved into highly parallel <a href="/wiki/Multi-core" class="mw-redirect" title="Multi-core">multi-core</a> systems allowing efficient manipulation of large blocks of data. This design is more effective than general-purpose <a href="/wiki/Central_processing_unit" title="Central processing unit">central processing unit</a> (CPUs) for <a href="/wiki/Algorithm" title="Algorithm">algorithms</a> in situations where processing large blocks of data is done in parallel, such as: </p> <ul><li><a href="/wiki/Cryptographic_hash_function" title="Cryptographic hash function">cryptographic hash functions</a></li> <li><a href="/wiki/Machine_learning" title="Machine learning">machine learning</a></li> <li><a href="/wiki/Molecular_dynamics" title="Molecular dynamics">molecular dynamics</a> simulations</li> <li><a href="/wiki/Physics_engine" title="Physics engine">physics engines</a></li></ul> <p>Ian Buck, while at Stanford in 2000, created an 8K gaming rig using 32 GeForce cards, then obtained a <a href="/wiki/DARPA" title="DARPA">DARPA</a> grant to perform <a href="/wiki/General-purpose_computing_on_graphics_processing_units" title="General-purpose computing on graphics processing units">general purpose parallel programming on GPUs</a>. He then joined Nvidia, where since 2004 he has been overseeing CUDA development. In pushing for CUDA, <a href="/wiki/Jensen_Huang" title="Jensen Huang">Jensen Huang</a> aimed for the Nvidia GPUs to become a general hardware for scientific computing. CUDA was released in 2007. Around 2015, the focus of CUDA changed to neural networks.<sup id="cite_ref-9" class="reference"><a href="#cite_note-9"><span class="cite-bracket">&#91;</span>9<span class="cite-bracket">&#93;</span></a></sup> </p> <div class="mw-heading mw-heading2"><h2 id="Ontology">Ontology</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=CUDA&amp;action=edit&amp;section=2" title="Edit section: Ontology"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>The following table offers a non-exact description for the <a href="/wiki/Ontology_(information_science)" title="Ontology (information science)">ontology</a> of CUDA framework. </p> <table class="wikitable"> <caption>The ontology of CUDA framework </caption> <tbody><tr> <th>memory<br />(hardware) </th> <th>memory (code, or <a href="/wiki/Scope_(computer_science)" title="Scope (computer science)">variable scoping</a>) </th> <th>computation<br />(hardware) </th> <th>computation<br />(code syntax) </th> <th>computation<br />(code semantics) </th></tr> <tr> <td><a href="/wiki/Random-access_memory" title="Random-access memory">RAM</a> </td> <td>non-CUDA variables </td> <td>host </td> <td>program </td> <td>one <a href="/wiki/Function_(computer_programming)" title="Function (computer programming)">routine call</a> </td></tr> <tr> <td><a href="/wiki/Video_random-access_memory" title="Video random-access memory">VRAM</a>,<br />GPU L2 cache </td> <td>global, const, texture </td> <td>device </td> <td>grid </td> <td>simultaneous call of the same <a href="/wiki/Subroutine" class="mw-redirect" title="Subroutine">subroutine</a> on many processors </td></tr> <tr> <td>GPU L1 cache </td> <td>local, shared </td> <td>SM ("streaming multiprocessor") </td> <td>block </td> <td>individual subroutine call </td></tr> <tr> <td> </td> <td> </td> <td>warp = 32 threads </td> <td> </td> <td><a href="/wiki/SIMD_instruction" class="mw-redirect" title="SIMD instruction">SIMD instructions</a> </td></tr> <tr> <td>GPU L0 cache,<br />register </td> <td> </td> <td>thread (aka. "SP", "streaming processor", "cuda core", but these names are now deprecated) </td> <td> </td> <td>analogous to individual scalar ops within a vector op </td></tr></tbody></table> <div class="mw-heading mw-heading2"><h2 id="Programming_abilities">Programming abilities</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=CUDA&amp;action=edit&amp;section=3" title="Edit section: Programming abilities"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <figure class="mw-halign-right" typeof="mw:File/Thumb"><a href="/wiki/File:CUDA_processing_flow_(En).PNG" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/5/59/CUDA_processing_flow_%28En%29.PNG/300px-CUDA_processing_flow_%28En%29.PNG" decoding="async" width="300" height="290" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/5/59/CUDA_processing_flow_%28En%29.PNG/450px-CUDA_processing_flow_%28En%29.PNG 1.5x, //upload.wikimedia.org/wikipedia/commons/5/59/CUDA_processing_flow_%28En%29.PNG 2x" data-file-width="600" data-file-height="580" /></a><figcaption><b>Example of CUDA processing flow</b> <div><ol><li>Copy data from main memory to GPU memory</li><li>CPU initiates the GPU <a href="/wiki/Compute_kernel" title="Compute kernel">compute kernel</a></li><li>GPU's CUDA cores execute the kernel in parallel</li><li>Copy the resulting data from GPU memory to main memory</li></ol></div></figcaption></figure> <p>The CUDA platform is accessible to software developers through CUDA-accelerated libraries, <a href="/wiki/Directive_(programming)" title="Directive (programming)">compiler directives</a> such as <a href="/wiki/OpenACC" title="OpenACC">OpenACC</a>, and extensions to industry-standard programming languages including <a href="/wiki/C_(programming_language)" title="C (programming language)">C</a>, <a href="/wiki/C%2B%2B" title="C++">C++</a>, <a href="/wiki/Fortran" title="Fortran">Fortran</a> and <a href="/wiki/Python_(programming_language)" title="Python (programming language)">Python</a>. C/C++ programmers can use 'CUDA C/C++', compiled to <a href="/wiki/Parallel_Thread_Execution" title="Parallel Thread Execution">PTX</a> with <a href="/wiki/NVIDIA_CUDA_Compiler" class="mw-redirect" title="NVIDIA CUDA Compiler">nvcc</a>, Nvidia's <a href="/wiki/LLVM" title="LLVM">LLVM</a>-based C/C++ compiler, or by clang itself.<sup id="cite_ref-10" class="reference"><a href="#cite_note-10"><span class="cite-bracket">&#91;</span>10<span class="cite-bracket">&#93;</span></a></sup> Fortran programmers can use 'CUDA Fortran', compiled with the PGI CUDA Fortran compiler from <a href="/wiki/The_Portland_Group" title="The Portland Group">The Portland Group</a>.<sup class="noprint Inline-Template" style="white-space:nowrap;">&#91;<i><a href="/wiki/Wikipedia:Manual_of_Style/Dates_and_numbers#Chronological_items" title="Wikipedia:Manual of Style/Dates and numbers"><span title="PGI Compilers &amp; Tools have evolved into the NVIDIA HPC SDK. The current Fortran compiler is called nvfortran. (December 2022)">needs update</span></a></i>&#93;</sup> Python programmers can use the cuNumeric library to accelerate applications on Nvidia GPUs. </p><p>In addition to libraries, compiler directives, CUDA C/C++ and CUDA Fortran, the CUDA platform supports other computational interfaces, including the <a href="/wiki/Khronos_Group" title="Khronos Group">Khronos Group</a>'s <a href="/wiki/OpenCL" title="OpenCL">OpenCL</a>,<sup id="cite_ref-11" class="reference"><a href="#cite_note-11"><span class="cite-bracket">&#91;</span>11<span class="cite-bracket">&#93;</span></a></sup> Microsoft's <a href="/wiki/DirectCompute" title="DirectCompute">DirectCompute</a>, <a href="/wiki/OpenGL" title="OpenGL">OpenGL</a> Compute Shader and <a href="/wiki/C%2B%2B_AMP" title="C++ AMP">C++ AMP</a>.<sup id="cite_ref-12" class="reference"><a href="#cite_note-12"><span class="cite-bracket">&#91;</span>12<span class="cite-bracket">&#93;</span></a></sup> Third party wrappers are also available for <a href="/wiki/Python_(programming_language)" title="Python (programming language)">Python</a>, <a href="/wiki/Perl" title="Perl">Perl</a>, Fortran, <a href="/wiki/Java_(programming_language)" title="Java (programming language)">Java</a>, <a href="/wiki/Ruby_(programming_language)" title="Ruby (programming language)">Ruby</a>, <a href="/wiki/Lua_(programming_language)" title="Lua (programming language)">Lua</a>, <a href="/wiki/Common_Lisp_(programming_language)" class="mw-redirect" title="Common Lisp (programming language)">Common Lisp</a>, <a href="/wiki/Haskell_(programming_language)" class="mw-redirect" title="Haskell (programming language)">Haskell</a>, <a href="/wiki/R_(programming_language)" title="R (programming language)">R</a>, <a href="/wiki/MATLAB" title="MATLAB">MATLAB</a>, <a href="/wiki/IDL_(programming_language)" title="IDL (programming language)">IDL</a>, <a href="/wiki/Julia_(programming_language)" title="Julia (programming language)">Julia</a>, and native support in <a href="/wiki/Mathematica" class="mw-redirect" title="Mathematica">Mathematica</a>. </p><p>In the <a href="/wiki/Computer_game" class="mw-redirect" title="Computer game">computer game</a> industry, GPUs are used for graphics rendering, and for <a href="/wiki/Physics_processing_unit" title="Physics processing unit">game physics calculations</a> (physical effects such as debris, smoke, fire, fluids); examples include <a href="/wiki/PhysX" title="PhysX">PhysX</a> and <a href="/wiki/Bullet_(software)" title="Bullet (software)">Bullet</a>. CUDA has also been used to accelerate non-graphical applications in <a href="/wiki/Computational_biology" title="Computational biology">computational biology</a>, <a href="/wiki/Cryptography" title="Cryptography">cryptography</a> and other fields by an <a href="/wiki/Order_of_magnitude" title="Order of magnitude">order of magnitude</a> or more.<sup id="cite_ref-Ioannidis08_13-0" class="reference"><a href="#cite_note-Ioannidis08-13"><span class="cite-bracket">&#91;</span>13<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-14" class="reference"><a href="#cite_note-14"><span class="cite-bracket">&#91;</span>14<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-Manavski2008_15-0" class="reference"><a href="#cite_note-Manavski2008-15"><span class="cite-bracket">&#91;</span>15<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-16" class="reference"><a href="#cite_note-16"><span class="cite-bracket">&#91;</span>16<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-17" class="reference"><a href="#cite_note-17"><span class="cite-bracket">&#91;</span>17<span class="cite-bracket">&#93;</span></a></sup> </p><p>CUDA provides both a low level <a href="/wiki/API" title="API">API</a> (CUDA <b>Driver</b> API, non single-source) and a higher level API (CUDA <b>Runtime</b> API, single-source). The initial CUDA <a href="/wiki/Software_development_kit" title="Software development kit">SDK</a> was made public on 15 February 2007, for <a href="/wiki/Microsoft_Windows" title="Microsoft Windows">Microsoft Windows</a> and <a href="/wiki/Linux" title="Linux">Linux</a>. <a href="/wiki/MacOS" title="MacOS">Mac OS X</a> support was later added in version 2.0,<sup id="cite_ref-18" class="reference"><a href="#cite_note-18"><span class="cite-bracket">&#91;</span>18<span class="cite-bracket">&#93;</span></a></sup> which supersedes the beta released February 14, 2008.<sup id="cite_ref-19" class="reference"><a href="#cite_note-19"><span class="cite-bracket">&#91;</span>19<span class="cite-bracket">&#93;</span></a></sup> CUDA works with all Nvidia GPUs from the G8x series onwards, including <a href="/wiki/Nvidia_GeForce" class="mw-redirect" title="Nvidia GeForce">GeForce</a>, <a href="/wiki/Nvidia_Quadro" class="mw-redirect" title="Nvidia Quadro">Quadro</a> and the <a href="/wiki/Nvidia_Tesla" title="Nvidia Tesla">Tesla</a> line. CUDA is compatible with most standard operating systems. </p><p>CUDA 8.0 comes with the following libraries (for compilation &amp; runtime, in alphabetical order): </p> <ul><li>cuBLAS – CUDA Basic Linear Algebra Subroutines library</li> <li>CUDART – CUDA Runtime library</li> <li>cuFFT – CUDA Fast Fourier Transform library</li> <li>cuRAND – CUDA Random Number Generation library</li> <li>cuSOLVER – CUDA based collection of dense and sparse direct solvers</li> <li>cuSPARSE – CUDA Sparse Matrix library</li> <li>NPP – NVIDIA Performance Primitives library</li> <li>nvGRAPH – NVIDIA Graph Analytics library</li> <li>NVML – NVIDIA Management Library</li> <li>NVRTC – NVIDIA Runtime Compilation library for CUDA C++</li></ul> <p>CUDA 8.0 comes with these other software components: </p> <ul><li>nView – NVIDIA nView Desktop Management Software</li> <li>NVWMI – NVIDIA Enterprise Management Toolkit</li> <li>GameWorks <a href="/wiki/PhysX" title="PhysX">PhysX</a> – is a multi-platform game physics engine</li></ul> <p>CUDA 9.0–9.2 comes with these other components: </p> <ul><li>CUTLASS 1.0 – custom linear algebra algorithms,</li> <li>NVIDIA Video Decoder was deprecated in CUDA 9.2; it is now available in NVIDIA Video Codec SDK</li></ul> <p>CUDA 10 comes with these other components: </p> <ul><li>nvJPEG – Hybrid (CPU and GPU) JPEG processing</li></ul> <p>CUDA 11.0–11.8 comes with these other components:<sup id="cite_ref-20" class="reference"><a href="#cite_note-20"><span class="cite-bracket">&#91;</span>20<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-21" class="reference"><a href="#cite_note-21"><span class="cite-bracket">&#91;</span>21<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-22" class="reference"><a href="#cite_note-22"><span class="cite-bracket">&#91;</span>22<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-23" class="reference"><a href="#cite_note-23"><span class="cite-bracket">&#91;</span>23<span class="cite-bracket">&#93;</span></a></sup> </p> <ul><li>CUB is new one of more supported C++ libraries</li> <li>MIG multi instance GPU support</li> <li>nvJPEG2000 – <a href="/wiki/JPEG_2000" title="JPEG 2000">JPEG 2000</a> encoder and decoder</li></ul> <div class="mw-heading mw-heading2"><h2 id="Advantages">Advantages</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=CUDA&amp;action=edit&amp;section=4" title="Edit section: Advantages"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>CUDA has several advantages over traditional general-purpose computation on GPUs (GPGPU) using graphics APIs: </p> <ul><li>Scattered reads&#160;&#8211;&#32;code can read from arbitrary addresses in memory.</li> <li>Unified virtual memory (CUDA&#160;4.0 and above)</li> <li>Unified memory (CUDA&#160;6.0 and above)</li> <li><a href="/wiki/Shared_memory_(interprocess_communication)" class="mw-redirect" title="Shared memory (interprocess communication)">Shared memory</a>&#160;&#8211;&#32;CUDA exposes a fast shared memory region that can be shared among threads. This can be used as a user-managed cache, enabling higher bandwidth than is possible using texture lookups.<sup id="cite_ref-24" class="reference"><a href="#cite_note-24"><span class="cite-bracket">&#91;</span>24<span class="cite-bracket">&#93;</span></a></sup></li> <li>Faster downloads and readbacks to and from the GPU</li> <li>Full support for integer and bitwise operations, including integer texture lookups</li></ul> <div class="mw-heading mw-heading2"><h2 id="Limitations">Limitations</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=CUDA&amp;action=edit&amp;section=5" title="Edit section: Limitations"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <ul><li>Whether for the host computer or the GPU device, all CUDA source code is now processed according to C++ syntax rules.<sup id="cite_ref-CUDA_Prog_v8_25-0" class="reference"><a href="#cite_note-CUDA_Prog_v8-25"><span class="cite-bracket">&#91;</span>25<span class="cite-bracket">&#93;</span></a></sup> This was not always the case. Earlier versions of CUDA were based on C syntax rules.<sup id="cite_ref-26" class="reference"><a href="#cite_note-26"><span class="cite-bracket">&#91;</span>26<span class="cite-bracket">&#93;</span></a></sup> As with the more general case of compiling C code with a C++ compiler, it is therefore possible that old C-style CUDA source code will either fail to compile or will not behave as originally intended.</li> <li>Interoperability with rendering languages such as OpenGL is one-way, with OpenGL having access to registered CUDA memory but CUDA not having access to OpenGL memory.</li> <li>Copying between host and device memory may incur a performance hit due to system bus bandwidth and latency (this can be partly alleviated with asynchronous memory transfers, handled by the GPU's DMA engine).</li> <li>Threads should be running in groups of at least 32 for best performance, with total number of threads numbering in the thousands. Branches in the program code do not affect performance significantly, provided that each of 32 threads takes the same execution path; the <a href="/wiki/Single_instruction,_multiple_data" title="Single instruction, multiple data">SIMD</a> execution model becomes a significant limitation for any inherently divergent task (e.g. traversing a <a href="/wiki/Space_partitioning" title="Space partitioning">space partitioning</a> data structure during <a href="/wiki/Ray_tracing_(graphics)" title="Ray tracing (graphics)">ray tracing</a>).</li> <li>No emulation or fallback functionality is available for modern revisions.</li> <li>Valid C++ may sometimes be flagged and prevent compilation due to the way the compiler approaches optimization for target GPU device limitations.<sup class="noprint Inline-Template Template-Fact" style="white-space:nowrap;">&#91;<i><a href="/wiki/Wikipedia:Citation_needed" title="Wikipedia:Citation needed"><span title="This claim needs references to reliable sources. (May 2016)">citation needed</span></a></i>&#93;</sup></li> <li>C++ <a href="/wiki/Run-time_type_information" title="Run-time type information">run-time type information</a> (RTTI) and C++-style exception handling are only supported in host code, not in device code.</li> <li>In <a href="/wiki/Single-precision_floating-point_format" title="Single-precision floating-point format">single-precision</a> on first generation CUDA compute capability 1.x devices, <a href="/wiki/Denormal_number" class="mw-redirect" title="Denormal number">denormal numbers</a> are unsupported and are instead flushed to zero, and the precision of both the division and square root operations are slightly lower than IEEE 754-compliant single precision math. Devices that support compute capability 2.0 and above support denormal numbers, and the division and square root operations are IEEE 754 compliant by default. However, users can obtain the prior faster gaming-grade math of compute capability 1.x devices if desired by setting compiler flags to disable accurate divisions and accurate square roots, and enable flushing denormal numbers to zero.<sup id="cite_ref-27" class="reference"><a href="#cite_note-27"><span class="cite-bracket">&#91;</span>27<span class="cite-bracket">&#93;</span></a></sup></li> <li>Unlike <a href="/wiki/OpenCL" title="OpenCL">OpenCL</a>, CUDA-enabled GPUs are only available from Nvidia as it is proprietary.<sup id="cite_ref-CUDA_products_28-0" class="reference"><a href="#cite_note-CUDA_products-28"><span class="cite-bracket">&#91;</span>28<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-:0_2-1" class="reference"><a href="#cite_note-:0-2"><span class="cite-bracket">&#91;</span>2<span class="cite-bracket">&#93;</span></a></sup> Attempts to implement CUDA on other GPUs include: <ul><li>Project Coriander: Converts CUDA C++11 source to OpenCL 1.2 C. A fork of CUDA-on-CL intended to run <a href="/wiki/TensorFlow" title="TensorFlow">TensorFlow</a>.<sup id="cite_ref-29" class="reference"><a href="#cite_note-29"><span class="cite-bracket">&#91;</span>29<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-30" class="reference"><a href="#cite_note-30"><span class="cite-bracket">&#91;</span>30<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-31" class="reference"><a href="#cite_note-31"><span class="cite-bracket">&#91;</span>31<span class="cite-bracket">&#93;</span></a></sup></li> <li>CU2CL: Convert CUDA 3.2 C++ to OpenCL C.<sup id="cite_ref-32" class="reference"><a href="#cite_note-32"><span class="cite-bracket">&#91;</span>32<span class="cite-bracket">&#93;</span></a></sup></li> <li><a href="/wiki/GPUOpen" title="GPUOpen">GPUOpen</a> HIP: A thin abstraction layer on top of CUDA and <a href="/wiki/ROCm" title="ROCm">ROCm</a> intended for AMD and Nvidia GPUs. Has a conversion tool for importing CUDA C++ source. Supports CUDA 4.0 plus C++11 and float16.</li> <li>ZLUDA is a drop-in replacement for CUDA on AMD GPUs and formerly Intel GPUs with near-native performance.<sup id="cite_ref-33" class="reference"><a href="#cite_note-33"><span class="cite-bracket">&#91;</span>33<span class="cite-bracket">&#93;</span></a></sup> The developer, Andrzej Janik, was separately contracted by both Intel and AMD to develop the software in 2021 and 2022, respectively. However, neither company decided to release it officially due to the lack of a business use case. AMD's contract included a clause that allowed Janik to release his code for AMD independently, allowing him to release the new version that only supports AMD GPUs.<sup id="cite_ref-34" class="reference"><a href="#cite_note-34"><span class="cite-bracket">&#91;</span>34<span class="cite-bracket">&#93;</span></a></sup></li> <li>chipStar can compile and run CUDA/HIP programs on advanced OpenCL 3.0 or Level Zero platforms.<sup id="cite_ref-35" class="reference"><a href="#cite_note-35"><span class="cite-bracket">&#91;</span>35<span class="cite-bracket">&#93;</span></a></sup></li></ul></li></ul> <div class="mw-heading mw-heading2"><h2 id="Example">Example</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=CUDA&amp;action=edit&amp;section=6" title="Edit section: Example"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>This example code in <a href="/wiki/C%2B%2B" title="C++">C++</a> loads a texture from an image into an array on the GPU: </p> <div class="mw-highlight mw-highlight-lang-cuda mw-content-ltr" dir="ltr"><pre><span></span><span class="n">texture</span><span class="o">&lt;</span><span class="kt">float</span><span class="p">,</span><span class="w"> </span><span class="mi">2</span><span class="p">,</span><span class="w"> </span><span class="n">cudaReadModeElementType</span><span class="o">&gt;</span><span class="w"> </span><span class="n">tex</span><span class="p">;</span> <span class="kt">void</span><span class="w"> </span><span class="nf">foo</span><span class="p">()</span> <span class="p">{</span> <span class="w"> </span><span class="n">cudaArray</span><span class="o">*</span><span class="w"> </span><span class="n">cu_array</span><span class="p">;</span> <span class="w"> </span><span class="c1">// Allocate array</span> <span class="w"> </span><span class="n">cudaChannelFormatDesc</span><span class="w"> </span><span class="n">description</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">cudaCreateChannelDesc</span><span class="o">&lt;</span><span class="kt">float</span><span class="o">&gt;</span><span class="p">();</span> <span class="w"> </span><span class="n">cudaMallocArray</span><span class="p">(</span><span class="o">&amp;</span><span class="n">cu_array</span><span class="p">,</span><span class="w"> </span><span class="o">&amp;</span><span class="n">description</span><span class="p">,</span><span class="w"> </span><span class="n">width</span><span class="p">,</span><span class="w"> </span><span class="n">height</span><span class="p">);</span> <span class="w"> </span><span class="c1">// Copy image data to array</span> <span class="w"> </span><span class="n">cudaMemcpyToArray</span><span class="p">(</span><span class="n">cu_array</span><span class="p">,</span><span class="w"> </span><span class="n">image</span><span class="p">,</span><span class="w"> </span><span class="n">width</span><span class="o">*</span><span class="n">height</span><span class="o">*</span><span class="k">sizeof</span><span class="p">(</span><span class="kt">float</span><span class="p">),</span><span class="w"> </span><span class="n">cudaMemcpyHostToDevice</span><span class="p">);</span> <span class="w"> </span><span class="c1">// Set texture parameters (default)</span> <span class="w"> </span><span class="n">tex</span><span class="p">.</span><span class="n">addressMode</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">cudaAddressModeClamp</span><span class="p">;</span> <span class="w"> </span><span class="n">tex</span><span class="p">.</span><span class="n">addressMode</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">cudaAddressModeClamp</span><span class="p">;</span> <span class="w"> </span><span class="n">tex</span><span class="p">.</span><span class="n">filterMode</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">cudaFilterModePoint</span><span class="p">;</span> <span class="w"> </span><span class="n">tex</span><span class="p">.</span><span class="n">normalized</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nb">false</span><span class="p">;</span><span class="w"> </span><span class="c1">// do not normalize coordinates</span> <span class="w"> </span><span class="c1">// Bind the array to the texture</span> <span class="w"> </span><span class="n">cudaBindTextureToArray</span><span class="p">(</span><span class="n">tex</span><span class="p">,</span><span class="w"> </span><span class="n">cu_array</span><span class="p">);</span> <span class="w"> </span><span class="c1">// Run kernel</span> <span class="w"> </span><span class="kt">dim3</span><span class="w"> </span><span class="nb">blockDim</span><span class="p">(</span><span class="mi">16</span><span class="p">,</span><span class="w"> </span><span class="mi">16</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">);</span> <span class="w"> </span><span class="kt">dim3</span><span class="w"> </span><span class="nb">gridDim</span><span class="p">((</span><span class="n">width</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="nb">blockDim</span><span class="p">.</span><span class="n">x</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="mi">1</span><span class="p">)</span><span class="o">/</span><span class="w"> </span><span class="nb">blockDim</span><span class="p">.</span><span class="n">x</span><span class="p">,</span><span class="w"> </span><span class="p">(</span><span class="n">height</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="nb">blockDim</span><span class="p">.</span><span class="n">y</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="mi">1</span><span class="p">)</span><span class="w"> </span><span class="o">/</span><span class="w"> </span><span class="nb">blockDim</span><span class="p">.</span><span class="n">y</span><span class="p">,</span><span class="w"> </span><span class="mi">1</span><span class="p">);</span> <span class="w"> </span><span class="n">kernel</span><span class="o">&lt;&lt;&lt;</span><span class="w"> </span><span class="nb">gridDim</span><span class="p">,</span><span class="w"> </span><span class="nb">blockDim</span><span class="p">,</span><span class="w"> </span><span class="mi">0</span><span class="w"> </span><span class="o">&gt;&gt;&gt;</span><span class="p">(</span><span class="n">d_data</span><span class="p">,</span><span class="w"> </span><span class="n">height</span><span class="p">,</span><span class="w"> </span><span class="n">width</span><span class="p">);</span> <span class="w"> </span><span class="c1">// Unbind the array from the texture</span> <span class="w"> </span><span class="n">cudaUnbindTexture</span><span class="p">(</span><span class="n">tex</span><span class="p">);</span> <span class="p">}</span><span class="w"> </span><span class="c1">//end foo()</span> <span class="kr">__global__</span><span class="w"> </span><span class="kt">void</span><span class="w"> </span><span class="n">kernel</span><span class="p">(</span><span class="kt">float</span><span class="o">*</span><span class="w"> </span><span class="n">odata</span><span class="p">,</span><span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="n">height</span><span class="p">,</span><span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="n">width</span><span class="p">)</span> <span class="p">{</span> <span class="w"> </span><span class="kt">unsigned</span><span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="n">x</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nb">blockIdx</span><span class="p">.</span><span class="n">x</span><span class="o">*</span><span class="nb">blockDim</span><span class="p">.</span><span class="n">x</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="nb">threadIdx</span><span class="p">.</span><span class="n">x</span><span class="p">;</span> <span class="w"> </span><span class="kt">unsigned</span><span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="n">y</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nb">blockIdx</span><span class="p">.</span><span class="n">y</span><span class="o">*</span><span class="nb">blockDim</span><span class="p">.</span><span class="n">y</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="nb">threadIdx</span><span class="p">.</span><span class="n">y</span><span class="p">;</span> <span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="n">x</span><span class="w"> </span><span class="o">&lt;</span><span class="w"> </span><span class="n">width</span><span class="w"> </span><span class="o">&amp;&amp;</span><span class="w"> </span><span class="n">y</span><span class="w"> </span><span class="o">&lt;</span><span class="w"> </span><span class="n">height</span><span class="p">)</span><span class="w"> </span><span class="p">{</span> <span class="w"> </span><span class="kt">float</span><span class="w"> </span><span class="n">c</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">tex2D</span><span class="p">(</span><span class="n">tex</span><span class="p">,</span><span class="w"> </span><span class="n">x</span><span class="p">,</span><span class="w"> </span><span class="n">y</span><span class="p">);</span> <span class="w"> </span><span class="n">odata</span><span class="p">[</span><span class="n">y</span><span class="o">*</span><span class="n">width</span><span class="o">+</span><span class="n">x</span><span class="p">]</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">c</span><span class="p">;</span> <span class="w"> </span><span class="p">}</span> <span class="p">}</span> </pre></div> <p>Below is an example given in <a href="/wiki/Python_(programming_language)" title="Python (programming language)">Python</a> that computes the product of two arrays on the GPU. The unofficial Python language bindings can be obtained from <i>PyCUDA</i>.<sup id="cite_ref-36" class="reference"><a href="#cite_note-36"><span class="cite-bracket">&#91;</span>36<span class="cite-bracket">&#93;</span></a></sup> </p> <div class="mw-highlight mw-highlight-lang-numpy mw-content-ltr" dir="ltr"><pre><span></span><span class="kn">import</span> <span class="nn">pycuda.compiler</span> <span class="k">as</span> <span class="nn">comp</span> <span class="kn">import</span> <span class="nn">pycuda.driver</span> <span class="k">as</span> <span class="nn">drv</span> <span class="kn">import</span> <span class="nn">numpy</span> <span class="kn">import</span> <span class="nn">pycuda.autoinit</span> <span class="kp">mod</span> <span class="o">=</span> <span class="n">comp</span><span class="o">.</span><span class="n">SourceModule</span><span class="p">(</span> <span class="w"> </span><span class="sd">&quot;&quot;&quot;</span> <span class="sd">__global__ void multiply_them(float *dest, float *a, float *b)</span> <span class="sd">{</span> <span class="sd"> const int i = threadIdx.x;</span> <span class="sd"> dest[i] = a[i] * b[i];</span> <span class="sd">}</span> <span class="sd">&quot;&quot;&quot;</span> <span class="p">)</span> <span class="n">multiply_them</span> <span class="o">=</span> <span class="kp">mod</span><span class="o">.</span><span class="n">get_function</span><span class="p">(</span><span class="s2">&quot;multiply_them&quot;</span><span class="p">)</span> <span class="n">a</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">400</span><span class="p">)</span><span class="o">.</span><span class="kp">astype</span><span class="p">(</span><span class="n">numpy</span><span class="o">.</span><span class="n">float32</span><span class="p">)</span> <span class="n">b</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">400</span><span class="p">)</span><span class="o">.</span><span class="kp">astype</span><span class="p">(</span><span class="n">numpy</span><span class="o">.</span><span class="n">float32</span><span class="p">)</span> <span class="n">dest</span> <span class="o">=</span> <span class="n">numpy</span><span class="o">.</span><span class="kp">zeros_like</span><span class="p">(</span><span class="n">a</span><span class="p">)</span> <span class="n">multiply_them</span><span class="p">(</span><span class="n">drv</span><span class="o">.</span><span class="n">Out</span><span class="p">(</span><span class="n">dest</span><span class="p">),</span> <span class="n">drv</span><span class="o">.</span><span class="n">In</span><span class="p">(</span><span class="n">a</span><span class="p">),</span> <span class="n">drv</span><span class="o">.</span><span class="n">In</span><span class="p">(</span><span class="n">b</span><span class="p">),</span> <span class="n">block</span><span class="o">=</span><span class="p">(</span><span class="mi">400</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">))</span> <span class="nb">print</span><span class="p">(</span><span class="n">dest</span> <span class="o">-</span> <span class="n">a</span> <span class="o">*</span> <span class="n">b</span><span class="p">)</span> </pre></div> <p>Additional Python bindings to simplify matrix multiplication operations can be found in the program <i>pycublas</i>.<sup id="cite_ref-37" class="reference"><a href="#cite_note-37"><span class="cite-bracket">&#91;</span>37<span class="cite-bracket">&#93;</span></a></sup> </p> <div class="mw-highlight mw-highlight-lang-numpy mw-content-ltr" dir="ltr"><pre><span></span> <span class="kn">import</span> <span class="nn">numpy</span> <span class="kn">from</span> <span class="nn">pycublas</span> <span class="kn">import</span> <span class="n">CUBLASMatrix</span> <span class="n">A</span> <span class="o">=</span> <span class="n">CUBLASMatrix</span><span class="p">(</span><span class="n">numpy</span><span class="o">.</span><span class="kp">mat</span><span class="p">([[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">]],</span> <span class="n">numpy</span><span class="o">.</span><span class="n">float32</span><span class="p">))</span> <span class="n">B</span> <span class="o">=</span> <span class="n">CUBLASMatrix</span><span class="p">(</span><span class="n">numpy</span><span class="o">.</span><span class="kp">mat</span><span class="p">([[</span><span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">],</span> <span class="p">[</span><span class="mi">6</span><span class="p">,</span> <span class="mi">7</span><span class="p">]],</span> <span class="n">numpy</span><span class="o">.</span><span class="n">float32</span><span class="p">))</span> <span class="n">C</span> <span class="o">=</span> <span class="n">A</span> <span class="o">*</span> <span class="n">B</span> <span class="nb">print</span><span class="p">(</span><span class="n">C</span><span class="o">.</span><span class="n">np_mat</span><span class="p">())</span> </pre></div> <p>while <a href="/wiki/CuPy" title="CuPy">CuPy</a> directly replaces NumPy:<sup id="cite_ref-38" class="reference"><a href="#cite_note-38"><span class="cite-bracket">&#91;</span>38<span class="cite-bracket">&#93;</span></a></sup> </p> <div class="mw-highlight mw-highlight-lang-numpy mw-content-ltr" dir="ltr"><pre><span></span><span class="kn">import</span> <span class="nn">cupy</span> <span class="n">a</span> <span class="o">=</span> <span class="n">cupy</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">400</span><span class="p">)</span> <span class="n">b</span> <span class="o">=</span> <span class="n">cupy</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="mi">400</span><span class="p">)</span> <span class="n">dest</span> <span class="o">=</span> <span class="n">cupy</span><span class="o">.</span><span class="kp">zeros_like</span><span class="p">(</span><span class="n">a</span><span class="p">)</span> <span class="nb">print</span><span class="p">(</span><span class="n">dest</span> <span class="o">-</span> <span class="n">a</span> <span class="o">*</span> <span class="n">b</span><span class="p">)</span> </pre></div> <div class="mw-heading mw-heading2"><h2 id="GPUs_supported">GPUs supported</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=CUDA&amp;action=edit&amp;section=7" title="Edit section: GPUs supported"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>Supported CUDA compute capability versions for CUDA SDK version and microarchitecture (by code name): <style data-mw-deduplicate="TemplateStyles:r1281342579">@media screen{.mw-parser-output .sticky-header>thead>tr:first-child,.mw-parser-output .sticky-header>caption+tbody>tr:first-child,.mw-parser-output .sticky-header>tbody:first-child>tr:first-child,.mw-parser-output .sticky-header-multi>thead{position:sticky;top:0;z-index:10}.mw-parser-output .sticky-header:not(.wikitable),.mw-parser-output .sticky-header-multi:not(.wikitable){background-color:white}.mw-parser-output .sticky-header:not(.wikitable)>*,.mw-parser-output .sticky-header:not(.wikitable)>thead>tr:first-child,.mw-parser-output .sticky-header:not(.wikitable)>caption+tbody>tr:first-child,.mw-parser-output .sticky-header:not(.wikitable)>tbody:first-child>tr:first-child,.mw-parser-output .sticky-header-multi:not(.wikitable)>thead,.mw-parser-output .sticky-header-multi>thead{background-color:inherit}.mw-parser-output .sticky-header.wikitable,.mw-parser-output .sticky-header-multi.wikitable{border-collapse:separate;border-spacing:0;border-width:0 1px 1px 0}.mw-parser-output .sticky-header.wikitable td,.mw-parser-output .sticky-header.wikitable th,.mw-parser-output .sticky-header-multi.wikitable td,.mw-parser-output .sticky-header-multi.wikitable th{border-width:1px 0 0 1px}body.skin-timeless .mw-parser-output .sticky-header.wikitable,body.skin-timeless .mw-parser-output .sticky-header-multi.wikitable{border-bottom-width:0.2em;padding:0}.mw-parser-output .sticky-header.static-row-numbers.wikitable tr::before,.mw-parser-output .sticky-header-multi.static-row-numbers.wikitable tr::before{border-left-width:1px}.mw-parser-output .sticky-header.static-row-numbers.wikitable>thead>tr:first-child::before,.mw-parser-output .sticky-header.static-row-numbers.wikitable>caption+tbody>tr:first-child::before,.mw-parser-output .sticky-header.static-row-numbers.wikitable>tbody:first-child>tr:first-child::before,.mw-parser-output .sticky-header-multi.static-row-numbers.wikitable>thead>tr:first-child::before,.mw-parser-output .sticky-header-multi.static-row-numbers.wikitable>caption+tbody>tr:first-child::before,.mw-parser-output .sticky-header-multi.static-row-numbers.wikitable>tbody:first-child>tr:first-child::before,.mw-parser-output .sticky-header.static-row-numbers.wikitable .sortbottom::before,.mw-parser-output .sticky-header-multi.static-row-numbers.wikitable .sortbottom::before{border-top-width:1px}.mw-parser-output .sticky-header.static-row-numbers.wikitable .sortbottom~.sortbottom::before,.mw-parser-output .sticky-header-multi.static-row-numbers.wikitable .sortbottom~.sortbottom::before{border-top-width:0}.mw-parser-output .sticky-header.static-row-numbers.wikitable>tbody>tr:not(.static-row-header)::before,.mw-parser-output .sticky-header-multi.static-row-numbers.wikitable>tbody>tr:not(.static-row-header)::before{border-bottom-width:0!important;border-right-width:0!important}body.skin-timeless .mw-parser-output .content-table-scrollbar,body.skin-timeless .mw-parser-output .overflowed,body.skin-timeless .mw-parser-output .overflowed .content-table{overflow:visible}body.skin-timeless .mw-parser-output .scroll-right.overflowed .content-table-right{box-shadow:none;border-left:none}}@media screen and (min-width:1120px){body.vector-sticky-header-visible .mw-parser-output .sticky-header>thead>tr:first-child,body.vector-sticky-header-visible .mw-parser-output .sticky-header>caption+tbody>tr:first-child,body.vector-sticky-header-visible .mw-parser-output .sticky-header>tbody:first-child>tr:first-child,body.vector-sticky-header-visible .mw-parser-output .sticky-header-multi>thead{top:3.125rem}}@media screen and (min-width:851px){body.skin-timeless .mw-parser-output .sticky-header>thead>tr:first-child,body.skin-timeless .mw-parser-output .sticky-header>caption+tbody>tr:first-child,body.skin-timeless .mw-parser-output .sticky-header>tbody:first-child>tr:first-child,body.skin-timeless .mw-parser-output .sticky-header-multi>thead{top:3.51em}}@media screen and (max-width:639px){body.skin-monobook .mw-parser-output .sticky-header,body.skin-monobook .mw-parser-output .sticky-header-multi,body.skin-vector-legacy .mw-parser-output .sticky-header,body.skin-vector-legacy .mw-parser-output .sticky-header-multi,body.skin-vector-2022 .mw-parser-output .sticky-header,body.skin-vector-2022 .mw-parser-output .sticky-header-multi{display:table}}@media screen{html.skin-theme-clientpref-night body.skin-minerva .mw-parser-output .sticky-header-multi.wikitable{background-color:#101418}}@media screen and (prefers-color-scheme:dark){html.skin-theme-clientpref-os body.skin-minerva .mw-parser-output .sticky-header-multi.wikitable{background-color:#101418}}</style> </p> <table class="wikitable sticky-header"> <caption>Compute capability (CUDA SDK support vs. microarchitecture) </caption> <tbody><tr> <th>CUDA SDK<br />version(s)</th> <th><a href="/wiki/Tesla_(microarchitecture)" title="Tesla (microarchitecture)">Tesla</a></th> <th><a href="/wiki/Fermi_(microarchitecture)" title="Fermi (microarchitecture)">Fermi</a></th> <th><a href="/wiki/Kepler_(microarchitecture)" title="Kepler (microarchitecture)">Kepler</a><br />(early)</th> <th><a href="/wiki/Kepler_(microarchitecture)" title="Kepler (microarchitecture)">Kepler</a><br />(late)</th> <th><a href="/wiki/Maxwell_(microarchitecture)" title="Maxwell (microarchitecture)">Maxwell</a></th> <th><a href="/wiki/Pascal_(microarchitecture)" title="Pascal (microarchitecture)">Pascal</a></th> <th><a href="/wiki/Volta_(microarchitecture)" title="Volta (microarchitecture)">Volta</a></th> <th><a href="/wiki/Turing_(microarchitecture)" title="Turing (microarchitecture)">Turing</a></th> <th><a href="/wiki/Ampere_(microarchitecture)" title="Ampere (microarchitecture)">Ampere</a></th> <th><a href="/wiki/Ada_Lovelace_(microarchitecture)" title="Ada Lovelace (microarchitecture)">Ada<br />Lovelace</a></th> <th><a href="/wiki/Hopper_(microarchitecture)" title="Hopper (microarchitecture)">Hopper</a></th> <th><a href="/wiki/Blackwell_(microarchitecture)" title="Blackwell (microarchitecture)">Blackwell</a> </th></tr> <tr> <td>1.0<sup id="cite_ref-39" class="reference"><a href="#cite_note-39"><span class="cite-bracket">&#91;</span>39<span class="cite-bracket">&#93;</span></a></sup></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1.0 – 1.1</td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td> </td></tr> <tr> <td>1.1</td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1.0 – 1.1+x</td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td> </td></tr> <tr> <td>2.0</td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1.0 – 1.1+x</td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td> </td></tr> <tr> <td>2.1 – 2.3.1<sup id="cite_ref-40" class="reference"><a href="#cite_note-40"><span class="cite-bracket">&#91;</span>40<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-41" class="reference"><a href="#cite_note-41"><span class="cite-bracket">&#91;</span>41<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-42" class="reference"><a href="#cite_note-42"><span class="cite-bracket">&#91;</span>42<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-43" class="reference"><a href="#cite_note-43"><span class="cite-bracket">&#91;</span>43<span class="cite-bracket">&#93;</span></a></sup></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1.0 – 1.3</td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td> </td></tr> <tr> <td>3.0 – 3.1<sup id="cite_ref-44" class="reference"><a href="#cite_note-44"><span class="cite-bracket">&#91;</span>44<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-45" class="reference"><a href="#cite_note-45"><span class="cite-bracket">&#91;</span>45<span class="cite-bracket">&#93;</span></a></sup></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1.0</td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2.0</td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td> </td></tr> <tr> <td>3.2<sup id="cite_ref-46" class="reference"><a href="#cite_note-46"><span class="cite-bracket">&#91;</span>46<span class="cite-bracket">&#93;</span></a></sup></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1.0</td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2.1</td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td> </td></tr> <tr> <td>4.0 – 4.2</td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1.0</td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2.1</td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td> </td></tr> <tr> <td>5.0 – 5.5</td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1.0</td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">3.0</td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">3.5</td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td> </td></tr> <tr> <td>6.0</td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1.0</td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">3.2</td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">3.5</td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td> </td></tr> <tr> <td>6.5</td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1.1</td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">3.7</td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">5.x</td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td> </td></tr> <tr> <td>7.0 – 7.5</td> <td></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2.0</td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">5.x</td> <td></td> <td></td> <td></td> <td></td> <td></td> <td></td> <td> </td></tr> <tr> <td>8.0</td> <td></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2.0</td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">6.x</td> <td></td> <td></td> <td></td> <td></td> <td></td> <td> </td></tr> <tr> <td>9.0 – 9.2</td> <td></td> <td></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">3.0</td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">7.0 – 7.2</td> <td></td> <td></td> <td></td> <td></td> <td> </td></tr> <tr> <td>10.0 – 10.2</td> <td></td> <td></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">3.0</td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">7.5</td> <td></td> <td></td> <td></td> <td> </td></tr> <tr> <td>11.0<sup id="cite_ref-47" class="reference"><a href="#cite_note-47"><span class="cite-bracket">&#91;</span>47<span class="cite-bracket">&#93;</span></a></sup></td> <td></td> <td></td> <td></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">3.5</td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8.0</td> <td></td> <td></td> <td> </td></tr> <tr> <td>11.1 – 11.4<sup id="cite_ref-48" class="reference"><a href="#cite_note-48"><span class="cite-bracket">&#91;</span>48<span class="cite-bracket">&#93;</span></a></sup></td> <td></td> <td></td> <td></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">3.5</td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8.6</td> <td></td> <td></td> <td> </td></tr> <tr> <td>11.5 – 11.7.1<sup id="cite_ref-49" class="reference"><a href="#cite_note-49"><span class="cite-bracket">&#91;</span>49<span class="cite-bracket">&#93;</span></a></sup></td> <td></td> <td></td> <td></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">3.5</td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8.7</td> <td></td> <td></td> <td> </td></tr> <tr> <td>11.8<sup id="cite_ref-50" class="reference"><a href="#cite_note-50"><span class="cite-bracket">&#91;</span>50<span class="cite-bracket">&#93;</span></a></sup></td> <td></td> <td></td> <td></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">3.5</td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8.9</td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">9.0</td> <td> </td></tr> <tr> <td>12.0 – 12.6</td> <td></td> <td></td> <td></td> <td></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">5.0</td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">9.0</td> <td> </td></tr> <tr> <td>12.8</td> <td></td> <td></td> <td></td> <td></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">5.0</td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"></td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">12.0 </td></tr></tbody></table> <p>Note: CUDA SDK 10.2 is the last official release for macOS, as support will not be available for macOS in newer releases. </p><p>CUDA compute capability by version with associated GPU semiconductors and GPU card models (separated by their various application areas): <link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1281342579" /> </p> <table class="wikitable sticky-header" style="font-size: 85%; text-align: center; width: auto;"> <caption>Compute capability, GPU semiconductors and Nvidia GPU board products </caption> <tbody><tr> <th>Compute<br />capability<br />(version) </th> <th><a href="/wiki/Microarchitecture" title="Microarchitecture">Micro-<br />architecture</a> </th> <th>GPUs </th> <th><a href="/wiki/GeForce" title="GeForce">GeForce</a> </th> <th><a href="/wiki/Quadro" title="Quadro">Quadro</a>, <a href="/wiki/Quadro#For_business_NVS" title="Quadro">NVS</a> </th> <th><a href="/wiki/Nvidia_Tesla" title="Nvidia Tesla">Tesla/Datacenter</a> </th> <th><a href="/wiki/Tegra" title="Tegra">Tegra</a>,<br /><a href="/wiki/Nvidia_Jetson" title="Nvidia Jetson">Jetson</a>,<br /><a href="/wiki/Nvidia_Drive" title="Nvidia Drive">DRIVE</a> </th></tr> <tr> <td>1.0 </td> <td rowspan="4"><a href="/wiki/Tesla_(microarchitecture)" title="Tesla (microarchitecture)">Tesla</a> </td> <td>G80 </td> <td>GeForce 8800 Ultra, GeForce 8800 GTX, GeForce 8800 GTS(G80) </td> <td>Quadro FX 5600, Quadro FX 4600, Quadro Plex 2100 S4 </td> <td>Tesla C870, Tesla D870, Tesla S870 </td> <td> </td></tr> <tr> <td>1.1 </td> <td>G92, G94, G96, G98, G84, G86 </td> <td>GeForce GTS 250, GeForce 9800 GX2, GeForce 9800 GTX, GeForce 9800 GT, GeForce 8800 GTS(G92), GeForce 8800 GT, GeForce 9600 GT, GeForce 9500 GT, GeForce 9400 GT, GeForce 8600 GTS, GeForce 8600 GT, GeForce 8500 GT,<br />GeForce G110M, GeForce 9300M GS, GeForce 9200M GS, GeForce 9100M G, GeForce 8400M GT, GeForce G105M </td> <td>Quadro FX 4700 X2, Quadro FX 3700, Quadro FX 1800, Quadro FX 1700, Quadro FX 580, Quadro FX 570, Quadro FX 470, Quadro FX 380, Quadro FX 370, Quadro FX 370 Low Profile, Quadro NVS 450, Quadro NVS 420, Quadro NVS 290, Quadro NVS 295, Quadro Plex 2100 D4,<br />Quadro FX 3800M, Quadro FX 3700M, Quadro FX 3600M, Quadro FX 2800M, Quadro FX 2700M, Quadro FX 1700M, Quadro FX 1600M, Quadro FX 770M, Quadro FX 570M, Quadro FX 370M, Quadro FX 360M, Quadro NVS 320M, Quadro NVS 160M, Quadro NVS 150M, Quadro NVS 140M, Quadro NVS 135M, Quadro NVS 130M, Quadro NVS 450, Quadro NVS 420,<sup id="cite_ref-51" class="reference"><a href="#cite_note-51"><span class="cite-bracket">&#91;</span>51<span class="cite-bracket">&#93;</span></a></sup> Quadro NVS 295 </td> <td> </td> <td> </td></tr> <tr> <td>1.2 </td> <td>GT218, GT216, GT215 </td> <td>GeForce GT 340*, GeForce GT 330*, GeForce GT 320*, GeForce 315*, GeForce 310*, GeForce GT 240, GeForce GT 220, GeForce 210,<br />GeForce GTS 360M, GeForce GTS 350M, GeForce GT 335M, GeForce GT 330M, GeForce GT 325M, GeForce GT 240M, GeForce G210M, GeForce 310M, GeForce 305M </td> <td>Quadro FX 380 Low Profile, Quadro FX 1800M, Quadro FX 880M, Quadro FX 380M,<br />Nvidia NVS 300, NVS 5100M, NVS 3100M, NVS 2100M, ION </td> <td> </td> <td> </td></tr> <tr> <td>1.3 </td> <td>GT200, GT200b </td> <td>GeForce GTX 295, GTX 285, GTX 280, GeForce GTX 275, GeForce GTX 260 </td> <td>Quadro FX 5800, Quadro FX 4800, Quadro FX 4800 for Mac, Quadro FX 3800, Quadro CX, Quadro Plex 2200 D2 </td> <td>Tesla C1060, Tesla S1070, Tesla M1060 </td> <td> </td></tr> <tr> <td>2.0 </td> <td rowspan="2"><a href="/wiki/Fermi_(microarchitecture)" title="Fermi (microarchitecture)">Fermi</a> </td> <td>GF100, GF110 </td> <td>GeForce GTX 590, GeForce GTX 580, GeForce GTX 570, GeForce GTX 480, GeForce GTX 470, GeForce GTX 465,<br />GeForce GTX 480M </td> <td>Quadro 6000, Quadro 5000, Quadro 4000, Quadro 4000 for Mac, Quadro Plex 7000,<br />Quadro 5010M, Quadro 5000M </td> <td>Tesla C2075, Tesla C2050/C2070, Tesla M2050/M2070/M2075/M2090 </td> <td> </td></tr> <tr> <td>2.1 </td> <td>GF104, GF106 GF108, GF114, GF116, GF117, GF119 </td> <td>GeForce GTX 560 Ti, GeForce GTX 550 Ti, GeForce GTX 460, GeForce GTS 450, GeForce GTS 450*, GeForce GT 640 (GDDR3), GeForce GT 630, GeForce GT 620, GeForce GT 610, GeForce GT 520, GeForce GT 440, GeForce GT 440*, GeForce GT 430, GeForce GT 430*, GeForce GT 420*,<br />GeForce GTX 675M, GeForce GTX 670M, GeForce GT 635M, GeForce GT 630M, GeForce GT 625M, GeForce GT 720M, GeForce GT 620M, GeForce 710M, GeForce 610M, GeForce 820M, GeForce GTX 580M, GeForce GTX 570M, GeForce GTX 560M, GeForce GT 555M, GeForce GT 550M, GeForce GT 540M, GeForce GT 525M, GeForce GT 520MX, GeForce GT 520M, GeForce GTX 485M, GeForce GTX 470M, GeForce GTX 460M, GeForce GT 445M, GeForce GT 435M, GeForce GT 420M, GeForce GT 415M, GeForce 710M, GeForce 410M </td> <td>Quadro 2000, Quadro 2000D, Quadro 600,<br />Quadro 4000M, Quadro 3000M, Quadro 2000M, Quadro 1000M,<br />NVS 310, NVS 315, NVS 5400M, NVS 5200M, NVS 4200M </td> <td> </td> <td> </td></tr> <tr> <td>3.0 </td> <td rowspan="4"><a href="/wiki/Kepler_(microarchitecture)" title="Kepler (microarchitecture)">Kepler</a> </td> <td>GK104, GK106, GK107 </td> <td>GeForce GTX 770, GeForce GTX 760, GeForce GT 740, GeForce GTX 690, GeForce GTX 680, GeForce GTX 670, GeForce GTX 660 Ti, GeForce GTX 660, GeForce GTX 650 Ti BOOST, GeForce GTX 650 Ti, GeForce GTX 650,<br />GeForce GTX 880M, GeForce GTX 870M, GeForce GTX 780M, GeForce GTX 770M, GeForce GTX 765M, GeForce GTX 760M, GeForce GTX 680MX, GeForce GTX 680M, GeForce GTX 675MX, GeForce GTX 670MX, GeForce GTX 660M, GeForce GT 750M, GeForce GT 650M, GeForce GT 745M, GeForce GT 645M, GeForce GT 740M, GeForce GT 730M, GeForce GT 640M, GeForce GT 640M LE, GeForce GT 735M, GeForce GT 730M </td> <td>Quadro K5000, Quadro K4200, Quadro K4000, Quadro K2000, Quadro K2000D, Quadro K600, Quadro K420,<br />Quadro K500M, Quadro K510M, Quadro K610M, Quadro K1000M, Quadro K2000M, Quadro K1100M, Quadro K2100M, Quadro K3000M, Quadro K3100M, Quadro K4000M, Quadro K5000M, Quadro K4100M, Quadro K5100M,<br />NVS 510, Quadro 410 </td> <td>Tesla K10, GRID K340, GRID K520, GRID K2 </td> <td> </td></tr> <tr> <td>3.2 </td> <td>GK20A </td> <td> </td> <td> </td> <td> </td> <td>Tegra&#160;K1,<br />Jetson&#160;TK1 </td></tr> <tr> <td>3.5 </td> <td>GK110, GK208 </td> <td>GeForce GTX Titan Z, GeForce GTX Titan Black, GeForce GTX Titan, GeForce GTX 780 Ti, GeForce GTX 780, GeForce GT 640 (GDDR5), GeForce GT 630 v2, GeForce GT 730, GeForce GT 720, GeForce GT 710, GeForce GT 740M (64-bit, DDR3), GeForce GT 920M </td> <td>Quadro K6000, Quadro K5200 </td> <td>Tesla K40, Tesla K20x, Tesla K20 </td> <td> </td></tr> <tr> <td>3.7 </td> <td>GK210 </td> <td> </td> <td> </td> <td>Tesla K80 </td> <td> </td></tr> <tr> <td>5.0 </td> <td rowspan="3"><a href="/wiki/Maxwell_(microarchitecture)" title="Maxwell (microarchitecture)">Maxwell</a> </td> <td>GM107, GM108 </td> <td>GeForce GTX 750 Ti, GeForce GTX 750, GeForce GTX 960M, GeForce GTX 950M, GeForce 940M, GeForce 930M, GeForce GTX 860M, GeForce GTX 850M, GeForce 845M, GeForce 840M, GeForce 830M </td> <td>Quadro K1200, Quadro K2200, Quadro K620, Quadro M2000M, Quadro M1000M, Quadro M600M, Quadro K620M, NVS 810 </td> <td>Tesla M10 </td> <td> </td></tr> <tr> <td>5.2 </td> <td>GM200, GM204, GM206 </td> <td>GeForce GTX Titan X, GeForce GTX 980 Ti, GeForce GTX 980, GeForce GTX 970, GeForce GTX 960, GeForce GTX 950, GeForce GTX 750 SE,<br />GeForce GTX 980M, GeForce GTX 970M, GeForce GTX 965M </td> <td>Quadro M6000 24GB, Quadro M6000, Quadro M5000, Quadro M4000, Quadro M2000, Quadro M5500,<br />Quadro M5000M, Quadro M4000M, Quadro M3000M </td> <td>Tesla M4, Tesla M40, Tesla M6, Tesla M60 </td> <td> </td></tr> <tr> <td>5.3 </td> <td>GM20B </td> <td> </td> <td> </td> <td> </td> <td>Tegra&#160;X1,<br />Jetson&#160;TX1,<br />Jetson&#160;Nano,<br />DRIVE&#160;CX,<br />DRIVE&#160;PX </td></tr> <tr> <td>6.0 </td> <td rowspan="3"><a href="/wiki/Pascal_(microarchitecture)" title="Pascal (microarchitecture)">Pascal</a> </td> <td>GP100 </td> <td> </td> <td>Quadro GP100 </td> <td>Tesla P100 </td> <td> </td></tr> <tr> <td>6.1 </td> <td>GP102, GP104, GP106, GP107, GP108 </td> <td>Nvidia TITAN Xp, Titan X,<br />GeForce GTX 1080 Ti, GTX 1080, GTX 1070 Ti, GTX 1070, GTX 1060,<br /> GTX 1050 Ti, GTX 1050, GT 1030, GT 1010,<br /> MX350, MX330, MX250, MX230, MX150, MX130, MX110 </td> <td>Quadro P6000, Quadro P5000, Quadro P4000, Quadro P2200, Quadro P2000, Quadro P1000, Quadro P400, Quadro P500, Quadro P520, Quadro P600,<br />Quadro P5000 (mobile), Quadro P4000 (mobile), Quadro P3000 (mobile) </td> <td>Tesla P40, Tesla P6, Tesla P4 </td> <td> </td></tr> <tr> <td>6.2 </td> <td>GP10B<sup id="cite_ref-52" class="reference"><a href="#cite_note-52"><span class="cite-bracket">&#91;</span>52<span class="cite-bracket">&#93;</span></a></sup> </td> <td> </td> <td> </td> <td> </td> <td>Tegra&#160;X2, Jetson&#160;TX2, DRIVE&#160;PX&#160;2 </td></tr> <tr> <td>7.0 </td> <td rowspan="2"><a href="/wiki/Volta_(microarchitecture)" title="Volta (microarchitecture)">Volta</a> </td> <td>GV100 </td> <td>NVIDIA TITAN V </td> <td>Quadro GV100 </td> <td>Tesla V100, Tesla V100S </td> <td> </td></tr> <tr> <td>7.2 </td> <td>GV10B<sup id="cite_ref-53" class="reference"><a href="#cite_note-53"><span class="cite-bracket">&#91;</span>53<span class="cite-bracket">&#93;</span></a></sup><br /> <p>GV11B<sup id="cite_ref-54" class="reference"><a href="#cite_note-54"><span class="cite-bracket">&#91;</span>54<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-55" class="reference"><a href="#cite_note-55"><span class="cite-bracket">&#91;</span>55<span class="cite-bracket">&#93;</span></a></sup> </p> </td> <td> </td> <td> </td> <td> </td> <td>Tegra Xavier,<br />Jetson Xavier NX,<br />Jetson AGX Xavier,<br />DRIVE AGX Xavier,<br />DRIVE AGX Pegasus,<br />Clara AGX </td></tr> <tr> <td>7.5 </td> <td><a href="/wiki/Turing_(microarchitecture)" title="Turing (microarchitecture)">Turing</a> </td> <td>TU102, TU104, TU106, TU116, TU117 </td> <td>NVIDIA TITAN RTX,<br />GeForce RTX 2080 Ti, RTX 2080 Super, RTX 2080, RTX 2070 Super, RTX 2070, RTX 2060 Super, RTX 2060 12GB, RTX 2060,<br />GeForce GTX 1660 Ti, GTX 1660 Super, GTX 1660, GTX 1650 Super, GTX 1650, MX550, MX450 </td> <td>Quadro RTX 8000, Quadro RTX 6000, Quadro RTX 5000, Quadro RTX 4000, T1000, T600, T400<br />T1200 (mobile), T600 (mobile), T500 (mobile), Quadro T2000 (mobile), Quadro T1000 (mobile) </td> <td>Tesla T4 </td> <td> </td></tr> <tr> <td>8.0 </td> <td rowspan="3"><a href="/wiki/Ampere_(microarchitecture)" title="Ampere (microarchitecture)">Ampere</a> </td> <td>GA100 </td> <td> </td> <td> </td> <td>A100 80GB, A100 40GB, A30 </td> <td> </td></tr> <tr> <td>8.6 </td> <td>GA102, GA103, GA104, GA106, GA107 </td> <td>GeForce RTX 3090 Ti, RTX 3090, RTX 3080 Ti, RTX 3080 12GB, RTX 3080, RTX 3070 Ti, RTX 3070, RTX 3060 Ti, RTX 3060, RTX 3050, RTX 3050 Ti (mobile), RTX 3050 (mobile), RTX 2050 (mobile), MX570 </td> <td>RTX A6000, RTX A5500, RTX A5000, RTX A4500, RTX A4000, RTX A2000<br /> RTX A5000 (mobile), RTX A4000 (mobile), RTX A3000 (mobile), RTX A2000 (mobile) </td> <td>A40, A16, A10, A2 </td> <td> </td></tr> <tr> <td>8.7 </td> <td>GA10B </td> <td> </td> <td> </td> <td> </td> <td>Jetson Orin Nano,<br />Jetson Orin NX,<br />Jetson AGX Orin,<br />DRIVE AGX Orin,<br />DRIVE&#160;AGX&#160;Pegasus&#160;OA,<br />Clara Holoscan </td></tr> <tr> <td>8.9 </td> <td><a href="/wiki/Ada_Lovelace_(microarchitecture)" title="Ada Lovelace (microarchitecture)">Ada Lovelace</a><sup id="cite_ref-56" class="reference"><a href="#cite_note-56"><span class="cite-bracket">&#91;</span>56<span class="cite-bracket">&#93;</span></a></sup> </td> <td>AD102, AD103, AD104, AD106, AD107 </td> <td>GeForce RTX 4090, RTX 4080 Super, RTX 4080, RTX 4070 Ti Super, RTX 4070 Ti, RTX 4070 Super, RTX 4070, RTX 4060 Ti, RTX 4060, RTX 4050 (mobile) </td> <td>RTX 6000 Ada, RTX 5880 Ada, RTX 5000 Ada, RTX 4500 Ada, RTX 4000 Ada, RTX 4000 SFF Ada, RTX 2000 Ada, RTX 5000 Ada (mobile), RTX 4000 Ada (mobile), RTX 3500 Ada (mobile), RTX 2000 Ada (mobile) </td> <td>L40S, L40, L20, L4, L2 </td> <td> </td></tr> <tr> <td>9.0 </td> <td><a href="/wiki/Hopper_(microarchitecture)" title="Hopper (microarchitecture)">Hopper</a> </td> <td>GH100 </td> <td> </td> <td> </td> <td>H200, H100 </td> <td> </td></tr> <tr> <td>10.0 </td> <td rowspan="4"><a href="/wiki/Blackwell_(microarchitecture)" title="Blackwell (microarchitecture)">Blackwell</a> </td> <td>GB100 </td> <td> </td> <td> </td> <td>B200, GB200 </td> <td> </td></tr> <tr> <td>10.1 </td> <td>GB102 </td> <td> </td> <td> </td> <td>B100 </td> <td> </td></tr> <tr> <td>12.0 </td> <td>GB202, GB203, GB205, GB206, GB207 </td> <td>GeForce RTX 5090, RTX 5080, RTX 5070 Ti, RTX 5070 </td> <td>RTX PRO 6000 Blackwell, RTX PRO 5000 Blackwell, RTX PRO 4500 Blackwell, RTX PRO 4000 Blackwell </td> <td>B40 </td> <td> </td></tr> <tr> <td>12.x (?) </td> <td> </td> <td> </td> <td> </td> <td> </td> <td>Jetson Thor (?), AGX Thor (?), Drive Thor (?) </td></tr> <tr> <th>Compute<br />capability<br />(version) </th> <th><a href="/wiki/Microarchitecture" title="Microarchitecture">Micro-<br />architecture</a> </th> <th>GPUs </th> <th><a href="/wiki/GeForce" title="GeForce">GeForce</a> </th> <th><a href="/wiki/Quadro" title="Quadro">Quadro</a>, <a href="/wiki/Quadro#For_business_NVS" title="Quadro">NVS</a> </th> <th><a href="/wiki/Nvidia_Tesla" title="Nvidia Tesla">Tesla/Datacenter</a> </th> <th><a href="/wiki/Tegra" title="Tegra">Tegra</a>,<br /><a href="/wiki/Nvidia_Jetson" title="Nvidia Jetson">Jetson</a>,<br /><a href="/wiki/Nvidia_Drive" title="Nvidia Drive">DRIVE</a> </th></tr></tbody></table> <p>* – <a href="/wiki/Original_equipment_manufacturer" title="Original equipment manufacturer">OEM</a>-only products </p> <div class="mw-heading mw-heading2"><h2 id="Version_features_and_specifications">Version features and specifications</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=CUDA&amp;action=edit&amp;section=8" title="Edit section: Version features and specifications"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <table class="wikitable" style="font-size:85%;"> <tbody><tr> <th rowspan="2">Feature support (unlisted features are supported for all compute capabilities) </th> <th colspan="14">Compute capability (version) </th></tr> <tr> <th>1.0, 1.1</th> <th>1.2, 1.3</th> <th>2.x</th> <th>3.0</th> <th>3.2</th> <th>3.5, 3.7, 5.x, 6.x, 7.0, 7.2</th> <th>7.5</th> <th>8.x</th> <th>9.0, 10.x, 12.0 </th></tr> <tr> <td>Warp vote functions (__all(), __any()) </td> <td colspan="1" style="background:#FFC7C7;color:black;vertical-align:middle;text-align:center;" class="table-no">No </td> <td colspan="8" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">Yes </td></tr> <tr> <td>Warp vote functions (__ballot()) </td> <td colspan="2" rowspan="5" style="background:#FFC7C7;color:black;vertical-align:middle;text-align:center;" class="table-no">No </td> <td colspan="7" rowspan="5" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">Yes </td></tr> <tr> <td>Memory fence functions (__threadfence_system()) </td></tr> <tr> <td>Synchronization functions (__syncthreads_count(), __syncthreads_and(), __syncthreads_or()) </td></tr> <tr> <td>Surface functions </td></tr> <tr> <td>3D grid of thread blocks </td></tr> <tr> <td>Warp shuffle functions </td> <td colspan="3" rowspan="2" style="background:#FFC7C7;color:black;vertical-align:middle;text-align:center;" class="table-no">No </td> <td colspan="6" rowspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">Yes </td></tr> <tr> <td>Unified memory programming </td></tr> <tr> <td>Funnel shift </td> <td colspan="4" rowspan="1" style="background:#FFC7C7;color:black;vertical-align:middle;text-align:center;" class="table-no">No </td> <td colspan="5" rowspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">Yes </td></tr> <tr> <td>Dynamic parallelism </td> <td colspan="5" rowspan="1" style="background:#FFC7C7;color:black;vertical-align:middle;text-align:center;" class="table-no">No </td> <td colspan="4" rowspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">Yes </td></tr> <tr> <td>Uniform Datapath<sup id="cite_ref-57" class="reference"><a href="#cite_note-57"><span class="cite-bracket">&#91;</span>57<span class="cite-bracket">&#93;</span></a></sup> </td> <td colspan="6" rowspan="1" style="background:#FFC7C7;color:black;vertical-align:middle;text-align:center;" class="table-no">No </td> <td colspan="3" rowspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">Yes </td></tr> <tr> <td>Hardware-accelerated async-copy </td> <td colspan="7" rowspan="4" style="background:#FFC7C7;color:black;vertical-align:middle;text-align:center;" class="table-no">No </td> <td colspan="2" rowspan="4" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">Yes </td></tr> <tr> <td>Hardware-accelerated <i>split arrive/wait barrier</i> </td></tr> <tr> <td>Warp-level support for reduction ops </td></tr> <tr> <td>L2 cache residency management </td></tr> <tr> <td>DPX instructions for accelerated dynamic programming </td> <td colspan="8" rowspan="4" style="background:#FFC7C7;color:black;vertical-align:middle;text-align:center;" class="table-no">No </td> <td colspan="1" rowspan="4" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">Yes </td></tr> <tr> <td>Distributed shared memory </td></tr> <tr> <td>Thread block cluster </td></tr> <tr> <td>Tensor memory accelerator (TMA) unit </td></tr> <tr> <th rowspan="2">Feature support (unlisted features are supported for all compute capabilities) </th> <th>1.0,1.1 </th> <th>1.2,1.3 </th> <th>2.x </th> <th>3.0 </th> <th>3.2 </th> <th>3.5, 3.7, 5.x, 6.x, 7.0, 7.2 </th> <th>7.5 </th> <th>8.x </th> <th>9.0, 10.x, 12.0 </th></tr> <tr> <th colspan="14">Compute capability (version) </th></tr></tbody></table><p><sup id="cite_ref-58" class="reference"><a href="#cite_note-58"><span class="cite-bracket">&#91;</span>58<span class="cite-bracket">&#93;</span></a></sup> </p><div class="mw-heading mw-heading3"><h3 id="Data_types">Data types</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=CUDA&amp;action=edit&amp;section=9" title="Edit section: Data types"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <div class="mw-heading mw-heading4"><h4 id="Floating-point_types">Floating-point types</h4><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=CUDA&amp;action=edit&amp;section=10" title="Edit section: Floating-point types"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <table class="wikitable" style="font-size:85%;"> <tbody><tr> <th>Data type </th> <th>Supported vector types </th> <th>Storage Length Bits<br />(complete vector) </th> <th>Used Length Bits<br />(single value) </th> <th>Sign Bits </th> <th>Exponent Bits </th> <th>Mantissa Bits </th> <th>Comments </th></tr> <tr> <td>E2M1 = FP4 </td> <td>e2m1x2 / e2m1x4 </td> <td>8 / 16 </td> <td>4 </td> <td>1 </td> <td>2 </td> <td>1 </td> <td> </td></tr> <tr> <td>E2M3 = FP6 variant </td> <td>e2m3x2 / e2m3x4 </td> <td>16 / 32 </td> <td>6 </td> <td>1 </td> <td>2 </td> <td>3 </td> <td> </td></tr> <tr> <td>E3M2 = FP6 variant </td> <td>e3m2x2 / e3m2x4 </td> <td>16 / 32 </td> <td>6 </td> <td>1 </td> <td>3 </td> <td>2 </td> <td> </td></tr> <tr> <td>UE4M3 </td> <td>ue4m3 </td> <td>8 </td> <td>7 </td> <td>0 </td> <td>4 </td> <td>3 </td> <td>Used for scaling (E2M1 only) </td></tr> <tr> <td>E4M3 = FP8 variant </td> <td>e4m3 / e4m3x2 / e4m3x4 </td> <td>8 / 16 / 32 </td> <td>8 </td> <td>1 </td> <td>4 </td> <td>3 </td> <td> </td></tr> <tr> <td>E5M2 = FP8 variant </td> <td>e5m2 / e5m2x2 / e5m2x4 </td> <td>8 / 16 / 32 </td> <td>8 </td> <td>1 </td> <td>5 </td> <td>2 </td> <td>Exponent/range of FP16, fits into 8 bits </td></tr> <tr> <td>UE8M0 </td> <td>ue8m0x2 </td> <td>16 </td> <td>8 </td> <td>0 </td> <td>8 </td> <td>0 </td> <td>Used for scaling (any FP4 or FP6 or FP8 format) </td></tr> <tr> <td>FP16 </td> <td>f16 / f16x2 </td> <td>16 / 32 </td> <td>16 </td> <td>1 </td> <td>5 </td> <td>10 </td> <td> </td></tr> <tr> <td>BF16 </td> <td>bf16 / bf16x2 </td> <td>16 / 32 </td> <td>16 </td> <td>1 </td> <td>8 </td> <td>7 </td> <td>Exponent/range of FP32, fits into 16 bits </td></tr> <tr> <td>TF32 </td> <td>tf32 </td> <td>32 </td> <td>19 </td> <td>1 </td> <td>8 </td> <td>10 </td> <td>Exponent/range of FP32, mantissa/precision of FP16 </td></tr> <tr> <td>FP32 </td> <td>f32 / f32x2 </td> <td>32 / 64 </td> <td>32 </td> <td>1 </td> <td>8 </td> <td>23 </td> <td> </td></tr> <tr> <td>FP64 </td> <td>f64 </td> <td>64 </td> <td>64 </td> <td>1 </td> <td>11 </td> <td>52 </td> <td> </td></tr></tbody></table> <div class="mw-heading mw-heading4"><h4 id="Version_support">Version support</h4><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=CUDA&amp;action=edit&amp;section=11" title="Edit section: Version support"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <table class="wikitable" style="font-size:85%;"> <tbody><tr> <th>Data type </th> <th>Basic Operations </th> <th>Supported since<br /> </th> <th>Atomic Operations </th> <th>Supported since<br />for global memory </th> <th>Supported since<br />for shared memory </th></tr> <tr> <td>8-bit integer<br />signed/unsigned </td> <td>loading, storing, conversion </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1.0 </td> <td data-sort-value="" style="background: var(--background-color-interactive, #ececec); color: var(--color-base, inherit); vertical-align: middle; text-align: center;" class="table-na">— </td> <td colspan="2" data-sort-value="" style="background: var(--background-color-interactive, #ececec); color: var(--color-base, inherit); vertical-align: middle; text-align: center;" class="table-na">— </td></tr> <tr> <td>16-bit integer<br />signed/unsigned </td> <td>general operations </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1.0 </td> <td>atomicCAS() </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">3.5 </td></tr> <tr> <td>32-bit integer<br />signed/unsigned </td> <td>general operations </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1.0 </td> <td>atomic functions </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1.1 </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1.2 </td></tr> <tr> <td>64-bit integer<br />signed/unsigned </td> <td>general operations </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1.0 </td> <td>atomic functions </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1.2 </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2.0 </td></tr> <tr> <td>any 128-bit trivially copyable type </td> <td>general operations </td> <td style="background:#FFC7C7;color:black;vertical-align:middle;text-align:center;" class="table-no">No </td> <td>atomicExch, atomicCAS </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">9.0 </td></tr> <tr> <td rowspan="2">16-bit floating point<br />FP16 </td> <td rowspan="2">addition, subtraction,<br />multiplication, comparison,<br />warp shuffle functions, conversion </td> <td rowspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">5.3 </td> <td>half2 atomic addition </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">6.0 </td></tr> <tr> <td>atomic addition </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">7.0 </td></tr> <tr> <td>16-bit floating point<br />BF16 </td> <td>addition, subtraction,<br />multiplication, comparison,<br />warp shuffle functions, conversion </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8.0 </td> <td>atomic addition </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8.0 </td></tr> <tr> <td rowspan="2">32-bit floating point </td> <td rowspan="2">general operations </td> <td rowspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1.0 </td> <td>atomicExch() </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1.1 </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1.2 </td></tr> <tr> <td>atomic addition </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2.0 </td></tr> <tr> <td rowspan="1">32-bit floating point float2 and float4 </td> <td>general operations </td> <td style="background:#FFC7C7;color:black;vertical-align:middle;text-align:center;" class="table-no">No </td> <td>atomic addition </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">9.0 </td></tr> <tr> <td rowspan="1">64-bit floating point </td> <td>general operations </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1.3 </td> <td>atomic addition </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">6.0 </td></tr></tbody></table> <p>Note: Any missing lines or empty entries do reflect some lack of information on that exact item.<sup id="cite_ref-59" class="reference"><a href="#cite_note-59"><span class="cite-bracket">&#91;</span>59<span class="cite-bracket">&#93;</span></a></sup> </p> <div class="mw-heading mw-heading3"><h3 id="Tensor_cores">Tensor cores</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=CUDA&amp;action=edit&amp;section=12" title="Edit section: Tensor cores"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <table class="wikitable" style="font-size:85%;"> <tbody><tr> <th>FMA per cycle per tensor core<sup id="cite_ref-60" class="reference"><a href="#cite_note-60"><span class="cite-bracket">&#91;</span>60<span class="cite-bracket">&#93;</span></a></sup> </th> <th colspan="2">Supported since </th> <th>7.0 </th> <th>7.2 </th> <th>7.5 Workstation </th> <th>7.5 Desktop </th> <th>8.0 </th> <th>8.6 Workstation </th> <th>8.7 </th> <th>8.6 Desktop </th> <th>8.9 Desktop </th> <th>8.9 Workstation </th> <th>9.0 </th> <th>10.0 </th> <th>10.1 </th> <th>12.0 </th></tr> <tr> <th>Data Type </th> <th>For dense matrices </th> <th>For sparse matrices </th> <td data-sort-value="" style="background: var(--background-color-interactive, #ececec); color: var(--color-base, inherit); vertical-align: middle; text-align: center;" class="table-na">1st Gen (8x/SM) </td> <td data-sort-value="" style="background: var(--background-color-interactive, #ececec); color: var(--color-base, inherit); vertical-align: middle; text-align: center;" class="table-na">1st Gen? (8x/SM) </td> <td colspan="2" data-sort-value="" style="background: var(--background-color-interactive, #ececec); color: var(--color-base, inherit); vertical-align: middle; text-align: center;" class="table-na">2nd Gen (8x/SM) </td> <td colspan="4" data-sort-value="" style="background: var(--background-color-interactive, #ececec); color: var(--color-base, inherit); vertical-align: middle; text-align: center;" class="table-na">3rd Gen (4x/SM) </td> <td colspan="3" data-sort-value="" style="background: var(--background-color-interactive, #ececec); color: var(--color-base, inherit); vertical-align: middle; text-align: center;" class="table-na">4th Gen (4x/SM) </td> <td colspan="3" data-sort-value="" style="background: var(--background-color-interactive, #ececec); color: var(--color-base, inherit); vertical-align: middle; text-align: center;" class="table-na">5th Gen (4x/SM) </td></tr> <tr> <td>1-bit values (AND) </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8.0 as<br />experimental </td> <td rowspan="2" style="background:#FFC7C7;color:black;vertical-align:middle;text-align:center;" class="table-no">No </td> <td colspan="4" style="background:#FFC7C7;color:black;vertical-align:middle;text-align:center;" class="table-no">No </td> <td colspan="3" rowspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">4096 </td> <td colspan="3" rowspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2048 </td> <td colspan="4" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">speed tbd </td></tr> <tr> <td>1-bit values (XOR) </td> <td rowspan="2" style="background: #FF8; color:black; vertical-align: middle; text-align: center;" class="table-maybe">7.5–8.9 as<br />experimental </td> <td rowspan="2" colspan="2" style="background:#FFC7C7;color:black;vertical-align:middle;text-align:center;" class="table-no">No </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1024 </td> <td rowspan="2" colspan="4" style="background: #FF8; color:black; vertical-align: middle; text-align: center;" class="table-maybe">Deprecated or removed? </td></tr> <tr> <td>4-bit integers </td> <td style="background: #FF8; color:black; vertical-align: middle; text-align: center;" class="table-maybe">8.0–8.9 as<br />experimental </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">256 </td> <td colspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1024 </td> <td colspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">512 </td></tr> <tr> <td>4-bit floating point FP4 (E2M1) </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">10.0 </td> <td colspan="11" style="background:#FFC7C7;color:black;vertical-align:middle;text-align:center;" class="table-no">No </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">4096 </td> <td colspan="1" rowspan="10" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">tbd </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">512 </td></tr> <tr> <td>6-bit floating point FP6 (E3M2 and E2M3) </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">10.0 </td> <td colspan="11" style="background:#FFC7C7;color:black;vertical-align:middle;text-align:center;" class="table-no">No </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2048 </td> <td colspan="2" style="background: #FF8; color:black; vertical-align: middle; text-align: center;" class="table-maybe">tbd </td></tr> <tr> <td>8-bit integers </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">7.2 </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8.0 </td> <td style="background:#FFC7C7;color:black;vertical-align:middle;text-align:center;" class="table-no">No </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">128 </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">128 </td> <td colspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">512 </td> <td colspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">256 </td> <td rowspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1024 </td> <td rowspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2048 </td> <td rowspan="2" colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">256 </td></tr> <tr> <td>8-bit floating point FP8 (E4M3 and E5M2) with FP16 accumulate </td> <td rowspan="2" colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8.9 </td> <td rowspan="2" colspan="8" style="background:#FFC7C7;color:black;vertical-align:middle;text-align:center;" class="table-no">No </td> <td rowspan="1" colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">256 </td></tr> <tr> <td>8-bit floating point FP8 (E4M3 and E5M2) with FP32 accumulate </td> <td rowspan="1" colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">128 </td> <td rowspan="1" colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">128 </td></tr> <tr> <td>16-bit floating point FP16 with FP16 accumulate </td> <td rowspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">7.0 </td> <td rowspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8.0 </td> <td rowspan="2" colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 </td> <td rowspan="2" colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 </td> <td rowspan="3" colspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">256 </td> <td colspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">128 </td> <td rowspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">512 </td> <td rowspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1024 </td> <td rowspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">128 </td></tr> <tr> <td>16-bit floating point FP16 with FP32 accumulate </td> <td rowspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 </td> <td colspan="2" rowspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 </td> <td colspan="1" rowspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">128 </td> <td rowspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 </td></tr> <tr> <td>16-bit floating point BF16 with FP32 accumulate </td> <td rowspan="2" colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">7.5<sup id="cite_ref-61" class="reference"><a href="#cite_note-61"><span class="cite-bracket">&#91;</span>61<span class="cite-bracket">&#93;</span></a></sup> </td> <td rowspan="2" colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8.0 </td> <td rowspan="3" colspan="2" style="background:#FFC7C7;color:black;vertical-align:middle;text-align:center;" class="table-no">No </td> <td colspan="1" style="background: #FF8; color:black; vertical-align: middle; text-align: center;" class="table-maybe">64<sup id="cite_ref-unofficial_support_in_SASS_62-0" class="reference"><a href="#cite_note-unofficial_support_in_SASS-62"><span class="cite-bracket">&#91;</span>62<span class="cite-bracket">&#93;</span></a></sup> </td></tr> <tr> <td>32-bit (19 bits used) floating point TF32 </td> <td colspan="1" style="background: #FF8; color:black; vertical-align: middle; text-align: center;" class="table-maybe">speed tbd (32?)<sup id="cite_ref-unofficial_support_in_SASS_62-1" class="reference"><a href="#cite_note-unofficial_support_in_SASS-62"><span class="cite-bracket">&#91;</span>62<span class="cite-bracket">&#93;</span></a></sup> </td> <td colspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">128 </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">256 </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">512 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 </td></tr> <tr> <td>64-bit floating point </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8.0 </td> <td style="background:#FFC7C7;color:black;vertical-align:middle;text-align:center;" class="table-no">No </td> <td colspan="2" style="background:#FFC7C7;color:black;vertical-align:middle;text-align:center;" class="table-no">No </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 </td> <td colspan="5" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">speed tbd </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 </td> <td colspan="2" style="background: #FF8; color:black; vertical-align: middle; text-align: center;" class="table-maybe">tbd </td></tr></tbody></table> <p>Note: Any missing lines or empty entries do reflect some lack of information on that exact item.<sup id="cite_ref-63" class="reference"><a href="#cite_note-63"><span class="cite-bracket">&#91;</span>63<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-64" class="reference"><a href="#cite_note-64"><span class="cite-bracket">&#91;</span>64<span class="cite-bracket">&#93;</span></a></sup> <sup id="cite_ref-65" class="reference"><a href="#cite_note-65"><span class="cite-bracket">&#91;</span>65<span class="cite-bracket">&#93;</span></a></sup> <sup id="cite_ref-66" class="reference"><a href="#cite_note-66"><span class="cite-bracket">&#91;</span>66<span class="cite-bracket">&#93;</span></a></sup> <sup id="cite_ref-67" class="reference"><a href="#cite_note-67"><span class="cite-bracket">&#91;</span>67<span class="cite-bracket">&#93;</span></a></sup> <sup id="cite_ref-68" class="reference"><a href="#cite_note-68"><span class="cite-bracket">&#91;</span>68<span class="cite-bracket">&#93;</span></a></sup> </p> <table class="wikitable" style="font-size:85%;"> <tbody><tr> <th>Tensor Core Composition </th> <th>7.0 </th> <th>7.2, 7.5 </th> <th>8.0, 8.6 </th> <th>8.7 </th> <th>8.9 </th> <th>9.0 </th></tr> <tr> <td>Dot Product Unit Width in FP16 units (in bytes)<sup id="cite_ref-69" class="reference"><a href="#cite_note-69"><span class="cite-bracket">&#91;</span>69<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-70" class="reference"><a href="#cite_note-70"><span class="cite-bracket">&#91;</span>70<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-71" class="reference"><a href="#cite_note-71"><span class="cite-bracket">&#91;</span>71<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-72" class="reference"><a href="#cite_note-72"><span class="cite-bracket">&#91;</span>72<span class="cite-bracket">&#93;</span></a></sup> </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">4 (8) </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8 (16) </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">4 (8) </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 (32) </td></tr> <tr> <td>Dot Product Units per Tensor Core </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 </td> <td colspan="4" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 </td></tr> <tr> <td>Tensor Cores per SM partition </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2 </td> <td colspan="4" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1 </td></tr> <tr> <td>Full throughput (Bytes/cycle)<sup id="cite_ref-ReferenceC_73-0" class="reference"><a href="#cite_note-ReferenceC-73"><span class="cite-bracket">&#91;</span>73<span class="cite-bracket">&#93;</span></a></sup> per SM partition<sup id="cite_ref-product_first_3_table_rows_74-0" class="reference"><a href="#cite_note-product_first_3_table_rows-74"><span class="cite-bracket">&#91;</span>74<span class="cite-bracket">&#93;</span></a></sup> </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">256 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">512 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">256 </td> <td> </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1024 </td></tr> <tr> <td>FP Tensor Cores: Minimum cycles for warp-wide matrix calculation </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">4 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8 </td> <td> </td> <td> </td></tr> <tr> <td>FP Tensor Cores: Minimum Matrix Shape for full throughput (Bytes)<sup id="cite_ref-ReferenceD_75-0" class="reference"><a href="#cite_note-ReferenceD-75"><span class="cite-bracket">&#91;</span>75<span class="cite-bracket">&#93;</span></a></sup> </td> <td colspan="4" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2048 </td> <td> </td> <td> </td></tr> <tr> <td>INT Tensor Cores: Minimum cycles for warp-wide matrix calculation </td> <td style="background:#FFC7C7;color:black;vertical-align:middle;text-align:center;" class="table-no">No </td> <td colspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">4 </td> <td> </td> <td> </td></tr> <tr> <td>INT Tensor Cores: Minimum Matrix Shape for full throughput (Bytes) </td> <td style="background:#FFC7C7;color:black;vertical-align:middle;text-align:center;" class="table-no">No </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1024 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2048 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1024 </td> <td> </td> <td> </td></tr></tbody></table> <p><sup id="cite_ref-76" class="reference"><a href="#cite_note-76"><span class="cite-bracket">&#91;</span>76<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-77" class="reference"><a href="#cite_note-77"><span class="cite-bracket">&#91;</span>77<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-78" class="reference"><a href="#cite_note-78"><span class="cite-bracket">&#91;</span>78<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-79" class="reference"><a href="#cite_note-79"><span class="cite-bracket">&#91;</span>79<span class="cite-bracket">&#93;</span></a></sup> </p> <table class="wikitable" style="font-size:85%;"> <tbody><tr> <th>FP64 Tensor Core Composition </th> <th>8.0 </th> <th>8.6 </th> <th>8.7 </th> <th>8.9 </th> <th>9.0 </th></tr> <tr> <td>Dot Product Unit Width in FP64 units (in bytes) </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">4 (32) </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">tbd </td> <td> </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">4 (32) </td></tr> <tr> <td>Dot Product Units per Tensor Core </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">4 </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">tbd </td> <td> </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8 </td></tr> <tr> <td>Tensor Cores per SM partition </td> <td colspan="5" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1 </td></tr> <tr> <td>Full throughput (Bytes/cycle)<sup id="cite_ref-ReferenceC_73-1" class="reference"><a href="#cite_note-ReferenceC-73"><span class="cite-bracket">&#91;</span>73<span class="cite-bracket">&#93;</span></a></sup> per SM partition<sup id="cite_ref-product_first_3_table_rows_74-1" class="reference"><a href="#cite_note-product_first_3_table_rows-74"><span class="cite-bracket">&#91;</span>74<span class="cite-bracket">&#93;</span></a></sup> </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">128 </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">tbd </td> <td> </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">256 </td></tr> <tr> <td>Minimum cycles for warp-wide matrix calculation </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">tbd </td> <td> </td> <td> </td></tr> <tr> <td>Minimum Matrix Shape for full throughput (Bytes)<sup id="cite_ref-ReferenceD_75-1" class="reference"><a href="#cite_note-ReferenceD-75"><span class="cite-bracket">&#91;</span>75<span class="cite-bracket">&#93;</span></a></sup> </td> <td colspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2048 </td> <td> </td> <td> </td></tr></tbody></table> <div class="mw-heading mw-heading3"><h3 id="Technical_specification">Technical specification</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=CUDA&amp;action=edit&amp;section=13" title="Edit section: Technical specification"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <div style="overflow-x:auto"> <table class="wikitable" style="font-size:85%;"> <tbody><tr> <th rowspan="2">Technical specifications </th> <th colspan="25">Compute capability (version) </th></tr> <tr> <th>1.0 </th> <th>1.1 </th> <th>1.2 </th> <th>1.3 </th> <th>2.x </th> <th>3.0 </th> <th>3.2 </th> <th>3.5 </th> <th>3.7 </th> <th>5.0 </th> <th>5.2 </th> <th>5.3 </th> <th>6.0 </th> <th>6.1 </th> <th>6.2 </th> <th>7.0 </th> <th>7.2 </th> <th>7.5 </th> <th>8.0 </th> <th>8.6 </th> <th>8.7 </th> <th>8.9 </th> <th>9.0 </th> <th>10.x </th> <th>12.0 </th></tr> <tr> <td>Maximum number of resident grids per device<br />(concurrent kernel execution, can be lower for specific devices) </td> <td colspan="4" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1 </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">4 </td> <td colspan="4" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">128 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">128 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 </td> <td colspan="8" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">128 </td></tr> <tr> <td>Maximum dimensionality of grid of thread blocks </td> <td colspan="4" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2 </td> <td colspan="21" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">3 </td></tr> <tr> <td>Maximum x-dimension of a grid of thread blocks </td> <td colspan="5" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">65535 </td> <td colspan="20" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2<sup>31</sup> − 1 </td></tr> <tr> <td>Maximum y-, or z-dimension of a grid of thread blocks </td> <td colspan="25" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">65535 </td></tr> <tr> <td>Maximum dimensionality of thread block </td> <td colspan="25" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">3 </td></tr> <tr> <td>Maximum x- or y-dimension of a block </td> <td colspan="4" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">512 </td> <td colspan="21" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1024 </td></tr> <tr> <td>Maximum z-dimension of a block </td> <td colspan="25" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 </td></tr> <tr> <td>Maximum number of threads per block </td> <td colspan="4" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">512 </td> <td colspan="21" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1024 </td></tr> <tr> <td>Warp size </td> <td colspan="25" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 </td></tr> <tr> <td>Maximum number of resident blocks per multiprocessor </td> <td colspan="5" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8 </td> <td colspan="4" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 </td> <td colspan="8" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">24 </td> <td colspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 </td></tr> <tr> <td>Maximum number of resident warps per multiprocessor </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">24 </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">48 </td> <td colspan="12" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 </td> <td colspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">48 </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">48 </td></tr> <tr> <td>Maximum number of resident threads per multiprocessor </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">768 </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1024 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1536 </td> <td colspan="12" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2048 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1024 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2048 </td> <td colspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1536 </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2048 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1536 </td></tr> <tr> <td>Number of 32-bit regular registers per multiprocessor </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8 K </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 K </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 K </td> <td colspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 K </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">128 K </td> <td colspan="16" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 K </td></tr> <tr> <td>Number of 32-bit uniform registers per multiprocessor </td> <td colspan="17" style="background:#FFC7C7;color:black;vertical-align:middle;text-align:center;" class="table-no">No </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2 K<sup id="cite_ref-ReferenceE_80-0" class="reference"><a href="#cite_note-ReferenceE-80"><span class="cite-bracket">&#91;</span>80<span class="cite-bracket">&#93;</span></a></sup> <p><sup id="cite_ref-81" class="reference"><a href="#cite_note-81"><span class="cite-bracket">&#91;</span>81<span class="cite-bracket">&#93;</span></a></sup> </p> </td> <td colspan="7" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"> </td></tr> <tr> <td>Maximum number of 32-bit registers per thread block </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8 K </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 K </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 K </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 K </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 K </td> <td colspan="4" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 K </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 K </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 K </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 K </td> <td colspan="10" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 K </td></tr> <tr> <td>Maximum number of 32-bit regular registers per thread </td> <td colspan="4" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">124 </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">63 </td> <td colspan="19" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">255 </td></tr> <tr> <td>Maximum number of 32-bit uniform registers per warp </td> <td colspan="17" style="background:#FFC7C7;color:black;vertical-align:middle;text-align:center;" class="table-no">No </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">63<sup id="cite_ref-ReferenceE_80-1" class="reference"><a href="#cite_note-ReferenceE-80"><span class="cite-bracket">&#91;</span>80<span class="cite-bracket">&#93;</span></a></sup> <p><sup id="cite_ref-82" class="reference"><a href="#cite_note-82"><span class="cite-bracket">&#91;</span>82<span class="cite-bracket">&#93;</span></a></sup> </p> </td> <td colspan="7" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"> </td></tr> <tr> <td>Amount of shared memory per multiprocessor<br />(out of overall shared memory + L1 cache, where applicable) </td> <td colspan="4" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 KiB </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 / 48 KiB (of 64 KiB) </td> <td colspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 / 32 / 48 KiB (of 64 KiB) </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">80 / 96 / 112 KiB (of 128 KiB) </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 KiB </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">96 KiB </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 KiB </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">96 KiB </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 KiB </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">0 / 8 / 16 / 32 / 64 / 96 KiB (of 128 KiB) </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 / 64 KiB (of 96 KiB) </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">0 / 8 / 16 / 32 / 64 / 100 / 132 / 164 KiB (of 192 KiB) </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">0 / 8 / 16 / 32 / 64 / 100 KiB (of 128 KiB) </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">0 / 8 / 16 / 32 / 64 / 100 / 132 / 164 KiB (of 192 KiB) </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">0 / 8 / 16 / 32 / 64 / 100 KiB (of 128 KiB) </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">0 / 8 / 16 / 32 / 64 / 100 / 132 / 164 / 196 / 228 KiB (of 256 KiB) </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">0 / 8 / 16 / 32 / 64 / 100 KiB (of 128 KiB) </td></tr> <tr> <td>Maximum amount of shared memory per thread block </td> <td colspan="4" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 KiB </td> <td colspan="11" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">48 KiB </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">96 KiB </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">48 KiB </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 KiB </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">163 KiB </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">99 KiB </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">163 KiB </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">99 KiB </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">227 KiB </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">99 KiB </td></tr> <tr> <td>Number of shared memory banks </td> <td colspan="4" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 </td> <td colspan="21" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 </td></tr> <tr> <td>Amount of local memory per thread </td> <td colspan="4" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 KiB </td> <td colspan="21" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">512 KiB </td></tr> <tr> <td>Constant memory size accessible by CUDA C/C++<br />(1 bank, PTX can access 11 banks, SASS can access 18 banks) </td> <td colspan="25" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 KiB </td></tr> <tr> <td>Cache working set per multiprocessor for constant memory </td> <td colspan="12" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8 KiB </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">4 KiB </td> <td colspan="12" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8 KiB </td></tr> <tr> <td>Cache working set per multiprocessor for texture memory </td> <td colspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16&#160;KiB per TPC </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">24&#160;KiB per TPC </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">12&#160;KiB </td> <td colspan="4" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">12&#160;–&#160;48&#160;KiB<sup id="cite_ref-83" class="reference"><a href="#cite_note-83"><span class="cite-bracket">&#91;</span>83<span class="cite-bracket">&#93;</span></a></sup> </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">24&#160;KiB </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">48&#160;KiB </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32&#160;KiB<sup id="cite_ref-Tegra_X1_84-0" class="reference"><a href="#cite_note-Tegra_X1-84"><span class="cite-bracket">&#91;</span>84<span class="cite-bracket">&#93;</span></a></sup> </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">24&#160;KiB </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">48&#160;KiB </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">24&#160;KiB </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32&#160;–&#160;128&#160;KiB </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32&#160;–&#160;64&#160;KiB </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">28&#160;–&#160;192&#160;KiB </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">28&#160;–&#160;128&#160;KiB </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">28&#160;–&#160;192&#160;KiB </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">28&#160;–&#160;128&#160;KiB </td> <td colspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">28&#160;–&#160;256&#160;KiB </td></tr> <tr> <td>Maximum width for 1D texture reference bound to a CUDA <br />array </td> <td colspan="4" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8192 </td> <td colspan="8" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">65536 </td> <td colspan="13" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">131072 </td></tr> <tr> <td>Maximum width for 1D texture reference bound to linear <br />memory </td> <td colspan="12" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2<sup>27</sup> </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2<sup>28</sup> </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2<sup>27</sup> </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2<sup>28</sup> </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2<sup>27</sup> </td> <td colspan="8" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2<sup>28</sup> </td></tr> <tr> <td>Maximum width and number of layers for a 1D layered <br />texture reference </td> <td colspan="4" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8192 × 512 </td> <td colspan="8" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16384 × 2048 </td> <td colspan="13" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32768 x 2048 </td></tr> <tr> <td>Maximum width and height for 2D texture reference bound <br />to a CUDA array </td> <td colspan="4" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">65536 × 32768 </td> <td colspan="8" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">65536 × 65535 </td> <td colspan="13" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">131072 x 65536 </td></tr> <tr> <td>Maximum width and height for 2D texture reference bound <br />to a linear memory </td> <td colspan="9" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">65000 x 65000 </td> <td colspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">65536 x 65536 </td> <td colspan="13" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">131072 x 65000 </td></tr> <tr> <td>Maximum width and height for 2D texture reference bound <br />to a CUDA array supporting texture gather </td> <td colspan="4" data-sort-value="" style="background: var(--background-color-interactive, #ececec); color: var(--color-base, inherit); vertical-align: middle; text-align: center;" class="table-na">— </td> <td colspan="8" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16384 x 16384 </td> <td colspan="13" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32768 x 32768 </td></tr> <tr> <td>Maximum width, height, and number of layers for a 2D <br />layered texture reference </td> <td colspan="4" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8192 × 8192 × 512 </td> <td colspan="8" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16384 × 16384 × 2048 </td> <td colspan="13" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32768 x 32768 x 2048 </td></tr> <tr> <td>Maximum width, height and depth for a 3D texture <br />reference bound to linear memory or a CUDA array </td> <td colspan="5" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2048<sup>3</sup> </td> <td colspan="7" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">4096<sup>3</sup> </td> <td colspan="13" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16384<sup>3</sup> </td></tr> <tr> <td>Maximum width (and height) for a cubemap texture reference </td> <td colspan="4" data-sort-value="" style="background: var(--background-color-interactive, #ececec); color: var(--color-base, inherit); vertical-align: middle; text-align: center;" class="table-na">— </td> <td colspan="8" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16384 </td> <td colspan="13" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32768 </td></tr> <tr> <td>Maximum width (and height) and number of layers <br />for a cubemap layered texture reference </td> <td colspan="4" data-sort-value="" style="background: var(--background-color-interactive, #ececec); color: var(--color-base, inherit); vertical-align: middle; text-align: center;" class="table-na">— </td> <td colspan="8" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16384 × 2046 </td> <td colspan="13" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32768 × 2046 </td></tr> <tr> <td>Maximum number of textures that can be bound to a <br />kernel </td> <td colspan="5" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">128 </td> <td colspan="20" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">256 </td></tr> <tr> <td>Maximum width for a 1D surface reference bound to a <br />CUDA array </td> <td colspan="4" rowspan="8" style="background:#FFC7C7;color:black;vertical-align:middle;text-align:center;" class="table-no">Not<br />supported </td> <td colspan="5" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">65536 </td> <td colspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16384 </td> <td colspan="13" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32768 </td></tr> <tr> <td>Maximum width and number of layers for a 1D layered <br />surface reference </td> <td colspan="5" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">65536 × 2048 </td> <td colspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16384 × 2048 </td> <td colspan="13" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32768 × 2048 </td></tr> <tr> <td>Maximum width and height for a 2D surface reference <br />bound to a CUDA array </td> <td colspan="5" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">65536 × 32768 </td> <td colspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16384 × 65536 </td> <td colspan="13" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">131072 × 65536 </td></tr> <tr> <td>Maximum width, height, and number of layers for a 2D <br />layered surface reference </td> <td colspan="5" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">65536 × 32768 × 2048 </td> <td colspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16384 × 16384 × 2048 </td> <td colspan="13" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32768 × 32768 × 2048 </td></tr> <tr> <td>Maximum width, height, and depth for a 3D surface <br />reference bound to a CUDA array </td> <td colspan="5" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">65536 × 32768 × 2048 </td> <td colspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">4096 × 4096 × 4096 </td> <td colspan="13" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16384 × 16384 × 16384 </td></tr> <tr> <td>Maximum width (and height) for a cubemap surface reference bound to a CUDA array </td> <td colspan="5" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32768 </td> <td colspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16384 </td> <td colspan="13" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32768 </td></tr> <tr> <td>Maximum width and number of layers for a cubemap <br />layered surface reference </td> <td colspan="5" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32768 × 2046 </td> <td colspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16384 × 2046 </td> <td colspan="13" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32768 × 2046 </td></tr> <tr> <td>Maximum number of surfaces that can be bound to a <br />kernel </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8 </td> <td colspan="10" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 </td> <td colspan="10" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 </td></tr> <tr> <td>Maximum number of instructions per kernel </td> <td colspan="4" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2 million </td> <td colspan="21" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">512 million </td></tr> <tr> <td>Maximum number of Thread Blocks per Thread Block Cluster<sup id="cite_ref-85" class="reference"><a href="#cite_note-85"><span class="cite-bracket">&#91;</span>85<span class="cite-bracket">&#93;</span></a></sup> </td> <td colspan="22" style="background:#FFC7C7;color:black;vertical-align:middle;text-align:center;" class="table-no">No </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8 </td></tr> <tr> <th rowspan="2">Technical specifications </th> <th>1.0 </th> <th>1.1 </th> <th>1.2 </th> <th>1.3 </th> <th>2.x </th> <th>3.0 </th> <th>3.2 </th> <th>3.5 </th> <th>3.7 </th> <th>5.0 </th> <th>5.2 </th> <th>5.3 </th> <th>6.0 </th> <th>6.1 </th> <th>6.2 </th> <th>7.0 </th> <th>7.2 </th> <th>7.5 </th> <th>8.0 </th> <th>8.6 </th> <th>8.7 </th> <th>8.9 </th> <th>9.0 </th> <th>10.x </th> <th>12.0 </th></tr> <tr> <th colspan="25">Compute capability (version) </th></tr></tbody></table><sup id="cite_ref-86" class="reference"><a href="#cite_note-86"><span class="cite-bracket">&#91;</span>86<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-87" class="reference"><a href="#cite_note-87"><span class="cite-bracket">&#91;</span>87<span class="cite-bracket">&#93;</span></a></sup> </div> <div class="mw-heading mw-heading3"><h3 id="Multiprocessor_architecture">Multiprocessor architecture</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=CUDA&amp;action=edit&amp;section=14" title="Edit section: Multiprocessor architecture"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <div style="overflow-x:auto"> <table class="wikitable" style="font-size:85%;"> <tbody><tr> <th rowspan="2">Architecture specifications </th> <th colspan="26">Compute capability (version) </th></tr> <tr> <th>1.0 </th> <th>1.1 </th> <th>1.2 </th> <th>1.3 </th> <th>2.0 </th> <th>2.1 </th> <th>3.0 </th> <th>3.2 </th> <th>3.5 </th> <th>3.7 </th> <th>5.0 </th> <th>5.2 </th> <th>5.3 </th> <th>6.0 </th> <th>6.1 </th> <th>6.2 </th> <th>7.0 </th> <th>7.2 </th> <th>7.5 </th> <th>8.0 </th> <th>8.6 </th> <th>8.7 </th> <th>8.9 </th> <th>9.0 </th> <th>10.x </th> <th>12.0 </th></tr> <tr> <td>Number of ALU lanes for INT32 arithmetic operations </td> <td rowspan="3" colspan="4" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8 </td> <td rowspan="3" colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 </td> <td rowspan="3" colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">48 </td> <td rowspan="3" colspan="4" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">192<sup id="cite_ref-88" class="reference"><a href="#cite_note-88"><span class="cite-bracket">&#91;</span>88<span class="cite-bracket">&#93;</span></a></sup> </td> <td rowspan="3" colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">128 </td> <td rowspan="4" colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">128 </td> <td rowspan="4" colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 </td> <td rowspan="3" colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">128 </td> <td rowspan="4" colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">128 </td> <td colspan="4" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 </td> <td rowspan="2" colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 </td> <td rowspan="4" colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">128 </td></tr> <tr> <td>Number of ALU lanes for any INT32 or FP32 arithmetic operation </td> <td colspan="4" data-sort-value="" style="background: var(--background-color-interactive, #ececec); color: var(--color-base, inherit); vertical-align: middle; text-align: center;" class="table-na">— </td> <td colspan="2" data-sort-value="" style="background: var(--background-color-interactive, #ececec); color: var(--color-base, inherit); vertical-align: middle; text-align: center;" class="table-na">— </td></tr> <tr> <td>Number of ALU lanes for FP32 arithmetic operations </td> <td rowspan="2" colspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 </td> <td rowspan="1" colspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 </td> <td rowspan="1" colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">128 </td> <td rowspan="2" colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">128 </td></tr> <tr> <td>Number of ALU lanes for FP16x2 arithmetic operations </td> <td colspan="12" style="background:#FFC7C7;color:black;vertical-align:middle;text-align:center;" class="table-no">No </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1 </td> <td rowspan="1" colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">128<sup id="cite_ref-89" class="reference"><a href="#cite_note-89"><span class="cite-bracket">&#91;</span>89<span class="cite-bracket">&#93;</span></a></sup> </td> <td rowspan="1" colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">128<sup id="cite_ref-90" class="reference"><a href="#cite_note-90"><span class="cite-bracket">&#91;</span>90<span class="cite-bracket">&#93;</span></a></sup> </td> <td rowspan="1" colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64<sup id="cite_ref-91" class="reference"><a href="#cite_note-91"><span class="cite-bracket">&#91;</span>91<span class="cite-bracket">&#93;</span></a></sup> </td></tr> <tr> <td>Number of ALU lanes for FP64 arithmetic operations </td> <td colspan="3" style="background:#FFC7C7;color:black;vertical-align:middle;text-align:center;" class="table-no">No </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 by FP32<sup id="cite_ref-92" class="reference"><a href="#cite_note-92"><span class="cite-bracket">&#91;</span>92<span class="cite-bracket">&#93;</span></a></sup> </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">4 by FP32<sup id="cite_ref-93" class="reference"><a href="#cite_note-93"><span class="cite-bracket">&#91;</span>93<span class="cite-bracket">&#93;</span></a></sup> </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8 / 64<sup id="cite_ref-depending_on_model_94-0" class="reference"><a href="#cite_note-depending_on_model-94"><span class="cite-bracket">&#91;</span>94<span class="cite-bracket">&#93;</span></a></sup> </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 </td> <td colspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">4<sup id="cite_ref-95" class="reference"><a href="#cite_note-95"><span class="cite-bracket">&#91;</span>95<span class="cite-bracket">&#93;</span></a></sup> </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">4 </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2 </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 </td> <td colspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2 </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2 </td></tr> <tr> <td>Number of Load/Store Units </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">4 per 2 SM </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8 per 2 SM </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8 per 2 SM / 3 SM<sup id="cite_ref-depending_on_model_94-1" class="reference"><a href="#cite_note-depending_on_model-94"><span class="cite-bracket">&#91;</span>94<span class="cite-bracket">&#93;</span></a></sup> </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8 per 3 SM </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 </td> <td colspan="7" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 </td> <td colspan="5" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 </td> <td colspan="4" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 </td></tr> <tr> <td>Number of special function units for single-precision floating-point transcendental functions </td> <td colspan="4" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2<sup id="cite_ref-96" class="reference"><a href="#cite_note-96"><span class="cite-bracket">&#91;</span>96<span class="cite-bracket">&#93;</span></a></sup> </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">4 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8 </td> <td colspan="6" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 </td> <td colspan="8" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 </td></tr> <tr> <td>Number of texture mapping units (TMU) </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">4 per 2 SM </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8 per 2 SM </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8 per 2 / 3SM<sup id="cite_ref-depending_on_model_94-2" class="reference"><a href="#cite_note-depending_on_model-94"><span class="cite-bracket">&#91;</span>94<span class="cite-bracket">&#93;</span></a></sup> </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8 per 3 SM </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">4 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">4 / 8<sup id="cite_ref-depending_on_model_94-3" class="reference"><a href="#cite_note-depending_on_model-94"><span class="cite-bracket">&#91;</span>94<span class="cite-bracket">&#93;</span></a></sup> </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8 </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 </td> <td colspan="6" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8 </td> <td colspan="8" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">4 </td></tr> <tr> <td>Number of ALU lanes for uniform INT32 arithmetic operations </td> <td colspan="18" style="background:#FFC7C7;color:black;vertical-align:middle;text-align:center;" class="table-no">No </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2<sup id="cite_ref-97" class="reference"><a href="#cite_note-97"><span class="cite-bracket">&#91;</span>97<span class="cite-bracket">&#93;</span></a></sup> </td> <td colspan="5" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes"> </td></tr> <tr> <td>Number of tensor cores </td> <td colspan="16" style="background:#FFC7C7;color:black;vertical-align:middle;text-align:center;" class="table-no">No </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8 (1st gen.)<sup id="cite_ref-inside-volta_98-0" class="reference"><a href="#cite_note-inside-volta-98"><span class="cite-bracket">&#91;</span>98<span class="cite-bracket">&#93;</span></a></sup> </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">0 / 8<sup id="cite_ref-depending_on_model_94-4" class="reference"><a href="#cite_note-depending_on_model-94"><span class="cite-bracket">&#91;</span>94<span class="cite-bracket">&#93;</span></a></sup> (2nd gen.) </td> <td colspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">4 (3rd gen.) </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">4 (4th gen.) </td></tr> <tr> <td>Number of raytracing cores </td> <td colspan="18" style="background:#FFC7C7;color:black;vertical-align:middle;text-align:center;" class="table-no">No </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">0 / 1<sup id="cite_ref-depending_on_model_94-5" class="reference"><a href="#cite_note-depending_on_model-94"><span class="cite-bracket">&#91;</span>94<span class="cite-bracket">&#93;</span></a></sup> (1st gen.) </td> <td style="background:#FFC7C7;color:black;vertical-align:middle;text-align:center;" class="table-no">No </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1 (2nd gen.) </td> <td colspan="1" style="background:#FFC7C7;color:black;vertical-align:middle;text-align:center;" class="table-no">No </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1 (3rd gen.) </td> <td colspan="1" style="background:#FFC7C7;color:black;vertical-align:middle;text-align:center;" class="table-no">No </td></tr> <tr> <td>Number of SM Partitions = Processing Blocks<sup id="cite_ref-99" class="reference"><a href="#cite_note-99"><span class="cite-bracket">&#91;</span>99<span class="cite-bracket">&#93;</span></a></sup> </td> <td colspan="10" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1 </td> <td colspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">4 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2 </td> <td colspan="10" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">4 </td></tr> <tr> <td>Number of warp schedulers per SM partition </td> <td colspan="4" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1 </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2 </td> <td colspan="4" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">4 </td> <td colspan="14" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1 </td></tr> <tr> <td>Max number of new instructions issued each cycle by a single scheduler<sup id="cite_ref-100" class="reference"><a href="#cite_note-100"><span class="cite-bracket">&#91;</span>100<span class="cite-bracket">&#93;</span></a></sup> </td> <td colspan="4" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2<sup id="cite_ref-101" class="reference"><a href="#cite_note-101"><span class="cite-bracket">&#91;</span>101<span class="cite-bracket">&#93;</span></a></sup> </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2<sup id="cite_ref-102" class="reference"><a href="#cite_note-102"><span class="cite-bracket">&#91;</span>102<span class="cite-bracket">&#93;</span></a></sup> </td> <td colspan="10" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2 </td> <td colspan="8" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1 </td></tr> <tr> <td>Size of unified memory for data cache and shared memory </td> <td colspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 KiB<sup id="cite_ref-shared_memory_only,_no_data_cache_103-0" class="reference"><a href="#cite_note-shared_memory_only,_no_data_cache-103"><span class="cite-bracket">&#91;</span>103<span class="cite-bracket">&#93;</span></a></sup> </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 KiB<sup id="cite_ref-shared_memory_only,_no_data_cache_103-1" class="reference"><a href="#cite_note-shared_memory_only,_no_data_cache-103"><span class="cite-bracket">&#91;</span>103<span class="cite-bracket">&#93;</span></a></sup> </td> <td colspan="5" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 KiB </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">128 KiB </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 KiB SM + 24 KiB L1 (separate)<sup id="cite_ref-ReferenceA_104-0" class="reference"><a href="#cite_note-ReferenceA-104"><span class="cite-bracket">&#91;</span>104<span class="cite-bracket">&#93;</span></a></sup> </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">96 KiB SM + 24 KiB L1 (separate)<sup id="cite_ref-ReferenceA_104-1" class="reference"><a href="#cite_note-ReferenceA-104"><span class="cite-bracket">&#91;</span>104<span class="cite-bracket">&#93;</span></a></sup> </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 KiB SM + 24 KiB L1 (separate)<sup id="cite_ref-ReferenceA_104-2" class="reference"><a href="#cite_note-ReferenceA-104"><span class="cite-bracket">&#91;</span>104<span class="cite-bracket">&#93;</span></a></sup> </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 KiB SM + 24 KiB L1 (separate)<sup id="cite_ref-ReferenceA_104-3" class="reference"><a href="#cite_note-ReferenceA-104"><span class="cite-bracket">&#91;</span>104<span class="cite-bracket">&#93;</span></a></sup> </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">96 KiB SM + 24 KiB L1 (separate)<sup id="cite_ref-ReferenceA_104-4" class="reference"><a href="#cite_note-ReferenceA-104"><span class="cite-bracket">&#91;</span>104<span class="cite-bracket">&#93;</span></a></sup> </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 KiB SM + 24 KiB L1 (separate)<sup id="cite_ref-ReferenceA_104-5" class="reference"><a href="#cite_note-ReferenceA-104"><span class="cite-bracket">&#91;</span>104<span class="cite-bracket">&#93;</span></a></sup> </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">128 KiB </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">96 KiB<sup id="cite_ref-105" class="reference"><a href="#cite_note-105"><span class="cite-bracket">&#91;</span>105<span class="cite-bracket">&#93;</span></a></sup> </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">192 KiB </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">128 KiB </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">192 KiB </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">128 KiB </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">256 KiB </td></tr> <tr> <td>Size of L3 instruction cache per GPU </td> <td colspan="3"> </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 KiB<sup id="cite_ref-106" class="reference"><a href="#cite_note-106"><span class="cite-bracket">&#91;</span>106<span class="cite-bracket">&#93;</span></a></sup> </td> <td colspan="2"> </td> <td colspan="18" rowspan="2" style="background: #FF8; color:black; vertical-align: middle; text-align: center;" class="table-maybe">use L2 Data Cache </td></tr> <tr> <td>Size of L2 instruction cache per Texture Processor Cluster (TPC) </td> <td colspan="3"> </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8 KiB </td> <td colspan="2"> </td></tr> <tr> <td>Size of L1.5 instruction cache per SM<sup id="cite_ref-ReferenceF_107-0" class="reference"><a href="#cite_note-ReferenceF-107"><span class="cite-bracket">&#91;</span>107<span class="cite-bracket">&#93;</span></a></sup> </td> <td rowspan="2" colspan="3"> </td> <td rowspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">4 KiB </td> <td colspan="2"> </td> <td colspan="3"> </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 KiB </td> <td> </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 KiB </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">48 KiB<sup id="cite_ref-Tegra_X1_84-1" class="reference"><a href="#cite_note-Tegra_X1-84"><span class="cite-bracket">&#91;</span>84<span class="cite-bracket">&#93;</span></a></sup> </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">128 KiB </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 KiB </td> <td> </td> <td rowspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">128 KiB </td> <td rowspan="2"> </td> <td rowspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">~46 KiB<sup id="cite_ref-108" class="reference"><a href="#cite_note-108"><span class="cite-bracket">&#91;</span>108<span class="cite-bracket">&#93;</span></a></sup> </td> <td rowspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">128 KiB<sup id="cite_ref-109" class="reference"><a href="#cite_note-109"><span class="cite-bracket">&#91;</span>109<span class="cite-bracket">&#93;</span></a></sup> </td> <td rowspan="2" colspan="4"> </td></tr> <tr> <td>Size of L1 instruction cache per SM </td> <td colspan="2"> </td> <td colspan="3"> </td> <td colspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8 KiB </td> <td> </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8 KiB </td> <td> </td></tr> <tr> <td>Size of L0 instruction cache per SM partition </td> <td colspan="10" style="background: #FF8; color:black; vertical-align: middle; text-align: center;" class="table-maybe">only 1 partition per SM </td> <td colspan="6" style="background:#FFC7C7;color:black;vertical-align:middle;text-align:center;" class="table-no">No </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">12 KiB </td> <td> </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 KiB?<sup id="cite_ref-110" class="reference"><a href="#cite_note-110"><span class="cite-bracket">&#91;</span>110<span class="cite-bracket">&#93;</span></a></sup> </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 KiB </td> <td colspan="4"> </td></tr> <tr> <td>Instruction Width<sup id="cite_ref-ReferenceF_107-1" class="reference"><a href="#cite_note-ReferenceF-107"><span class="cite-bracket">&#91;</span>107<span class="cite-bracket">&#93;</span></a></sup> </td> <td colspan="6" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 bits instructions and 64 bits instructions<sup id="cite_ref-111" class="reference"><a href="#cite_note-111"><span class="cite-bracket">&#91;</span>111<span class="cite-bracket">&#93;</span></a></sup> </td> <td colspan="4" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 bits instructions + 64 bits control logic every 7 instructions </td> <td colspan="6" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 bits instructions + 64 bits control logic every 3 instructions </td> <td colspan="8" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">128 bits combined instruction and control logic </td></tr> <tr> <td>Memory Bus Width per Memory Partition in bits </td> <td colspan="12" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 ((G)DDR) </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 ((G)DDR) </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">512 (HBM) </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 ((G)DDR) </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">512 (HBM) </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 ((G)DDR) </td> <td style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">512 (HBM) </td> <td colspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 ((G)DDR) </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">512 (HBM) </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 ((G)DDR) </td></tr> <tr> <td>L2 Cache per Memory Partition </td> <td colspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 KiB<sup id="cite_ref-ReferenceB_112-0" class="reference"><a href="#cite_note-ReferenceB-112"><span class="cite-bracket">&#91;</span>112<span class="cite-bracket">&#93;</span></a></sup> </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">32 KiB<sup id="cite_ref-ReferenceB_112-1" class="reference"><a href="#cite_note-ReferenceB-112"><span class="cite-bracket">&#91;</span>112<span class="cite-bracket">&#93;</span></a></sup> </td> <td colspan="4" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">128 KiB </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">256 KiB </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">1 MiB </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">512 KiB </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">128 KiB </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">512 KiB </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">256 KiB </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">128 KiB </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">768 KiB </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">64 KiB </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">512 KiB </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">4 MiB </td> <td colspan="2" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">512 KiB </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8 MiB<sup id="cite_ref-113" class="reference"><a href="#cite_note-113"><span class="cite-bracket">&#91;</span>113<span class="cite-bracket">&#93;</span></a></sup> </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">5 MiB </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">6.25 MiB </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8 MiB<sup id="cite_ref-114" class="reference"><a href="#cite_note-114"><span class="cite-bracket">&#91;</span>114<span class="cite-bracket">&#93;</span></a></sup> </td></tr> <tr> <td>Number of Render Output Units (ROP) per memory partition (or per GPC in later models) </td> <td colspan="4" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">4 </td> <td colspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">4 </td> <td colspan="3" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">12 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">4 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">2 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">8 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 per GPC </td> <td colspan="1" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">3 per GPC </td> <td colspan="4" style="background:#9EFF9E;color:black;vertical-align:middle;text-align:center;" class="table-yes">16 per GPC </td></tr> <tr> <th rowspan="2">Architecture specifications </th> <th>1.0 </th> <th>1.1 </th> <th>1.2 </th> <th>1.3 </th> <th>2.0 </th> <th>2.1 </th> <th>3.0 </th> <th>3.2 </th> <th>3.5 </th> <th>3.7 </th> <th>5.0 </th> <th>5.2 </th> <th>5.3 </th> <th>6.0 </th> <th>6.1 </th> <th>6.2 </th> <th>7.0 </th> <th>7.2 </th> <th>7.5 </th> <th>8.0 </th> <th>8.6 </th> <th>8.7 </th> <th>8.9 </th> <th>9.0 </th> <th>10.x </th> <th>12.0 </th></tr> <tr> <th colspan="26">Compute capability (version) </th></tr></tbody></table> </div> <p>For more information read the Nvidia CUDA C++ Programming Guide.<sup id="cite_ref-115" class="reference"><a href="#cite_note-115"><span class="cite-bracket">&#91;</span>115<span class="cite-bracket">&#93;</span></a></sup> </p> <div class="mw-heading mw-heading2"><h2 id="Current_and_future_usages_of_CUDA_architecture">Current and future usages of CUDA architecture</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=CUDA&amp;action=edit&amp;section=15" title="Edit section: Current and future usages of CUDA architecture"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <ul><li>Accelerated rendering of 3D graphics</li> <li>Accelerated interconversion of video file formats</li> <li>Accelerated <a href="/wiki/Encryption" title="Encryption">encryption</a>, <a href="/wiki/Decryption" class="mw-redirect" title="Decryption">decryption</a> and <a href="/wiki/Data_compression" title="Data compression">compression</a></li> <li><a href="/wiki/Bioinformatics" title="Bioinformatics">Bioinformatics</a>, e.g. <a href="/wiki/Massive_parallel_sequencing" title="Massive parallel sequencing">NGS</a> DNA sequencing BarraCUDA<sup id="cite_ref-116" class="reference"><a href="#cite_note-116"><span class="cite-bracket">&#91;</span>116<span class="cite-bracket">&#93;</span></a></sup></li> <li>Distributed calculations, such as predicting the native conformation of <a href="/wiki/Proteins" class="mw-redirect" title="Proteins">proteins</a></li> <li>Medical analysis simulations, for example <a href="/wiki/Virtual_reality" title="Virtual reality">virtual reality</a> based on <a href="/wiki/X-ray_computed_tomography" class="mw-redirect" title="X-ray computed tomography">CT</a> and <a href="/wiki/Magnetic_resonance_imaging" title="Magnetic resonance imaging">MRI</a> scan images</li> <li>Physical simulations,<sup id="cite_ref-117" class="reference"><a href="#cite_note-117"><span class="cite-bracket">&#91;</span>117<span class="cite-bracket">&#93;</span></a></sup> in particular in <a href="/wiki/Fluid_dynamics" title="Fluid dynamics">fluid dynamics</a></li> <li><a href="/wiki/Neural_network" title="Neural network">Neural network</a> training in <a href="/wiki/Machine_learning" title="Machine learning">machine learning</a> problems</li> <li><a href="/wiki/Large_Language_Model" class="mw-redirect" title="Large Language Model">Large Language Model</a> inference</li> <li><a href="/wiki/Face_recognition" class="mw-redirect" title="Face recognition">Face recognition</a></li> <li><a href="/wiki/Volunteer_computing" title="Volunteer computing">Volunteer computing</a> projects, such as <a href="/wiki/SETI@home" title="SETI@home">SETI@home</a> and other projects using <a href="/wiki/Berkeley_Open_Infrastructure_for_Network_Computing" title="Berkeley Open Infrastructure for Network Computing">BOINC</a> software</li> <li><a href="/wiki/Molecular_dynamics" title="Molecular dynamics">Molecular dynamics</a></li> <li>Mining <a href="/wiki/Cryptocurrencies" class="mw-redirect" title="Cryptocurrencies">cryptocurrencies</a></li> <li><a href="/wiki/Structure_from_motion" title="Structure from motion">Structure from motion</a> (SfM) software</li></ul> <div class="mw-heading mw-heading2"><h2 id="Comparison_with_competitors">Comparison with competitors</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=CUDA&amp;action=edit&amp;section=16" title="Edit section: Comparison with competitors"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>CUDA competes with other GPU computing stacks: <a href="/wiki/OneAPI_(compute_acceleration)" title="OneAPI (compute acceleration)">Intel OneAPI</a> and <a href="/wiki/ROCm" title="ROCm">AMD ROCm</a>. </p><p>Whereas Nvidia's CUDA is closed-source, Intel's OneAPI and AMD's ROCm are open source. </p> <div class="mw-heading mw-heading3"><h3 id="Intel_OneAPI">Intel OneAPI</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=CUDA&amp;action=edit&amp;section=17" title="Edit section: Intel OneAPI"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1236090951" /><div role="note" class="hatnote navigation-not-searchable">Main article: <a href="/wiki/OneAPI_(compute_acceleration)" title="OneAPI (compute acceleration)">OneAPI (compute acceleration)</a></div> <p><b>oneAPI</b> is an initiative based in open standards, created to support software development for multiple hardware architectures.<sup id="cite_ref-118" class="reference"><a href="#cite_note-118"><span class="cite-bracket">&#91;</span>118<span class="cite-bracket">&#93;</span></a></sup> The oneAPI libraries must implement open specifications that are discussed publicly by the Special Interest Groups, offering the possibility for any developer or organization to implement their own versions of oneAPI libraries.<sup id="cite_ref-119" class="reference"><a href="#cite_note-119"><span class="cite-bracket">&#91;</span>119<span class="cite-bracket">&#93;</span></a></sup><sup id="cite_ref-120" class="reference"><a href="#cite_note-120"><span class="cite-bracket">&#91;</span>120<span class="cite-bracket">&#93;</span></a></sup> </p><p>Originally made by Intel, other hardware adopters include Fujitsu and Huawei. </p> <div class="mw-heading mw-heading4"><h4 id="Unified_Acceleration_Foundation_(UXL)"><span id="Unified_Acceleration_Foundation_.28UXL.29"></span>Unified Acceleration Foundation (UXL)</h4><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=CUDA&amp;action=edit&amp;section=18" title="Edit section: Unified Acceleration Foundation (UXL)"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <p>Unified Acceleration Foundation (UXL) is a new technology consortium working on the continuation of the OneAPI initiative, with the goal to create a new open standard accelerator software ecosystem, related open standards and specification projects through Working Groups and Special Interest Groups (SIGs). The goal is to offer open alternatives to Nvidia's CUDA. The main companies behind it are Intel, Google, ARM, Qualcomm, Samsung, Imagination, and VMware.<sup id="cite_ref-121" class="reference"><a href="#cite_note-121"><span class="cite-bracket">&#91;</span>121<span class="cite-bracket">&#93;</span></a></sup> </p> <div class="mw-heading mw-heading3"><h3 id="AMD_ROCm">AMD ROCm</h3><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=CUDA&amp;action=edit&amp;section=19" title="Edit section: AMD ROCm"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1236090951" /><div role="note" class="hatnote navigation-not-searchable">Main article: <a href="/wiki/ROCm" title="ROCm">ROCm</a></div> <p><b>ROCm</b><sup id="cite_ref-122" class="reference"><a href="#cite_note-122"><span class="cite-bracket">&#91;</span>122<span class="cite-bracket">&#93;</span></a></sup> is an open source software stack for <a href="/wiki/Graphics_processing_unit" title="Graphics processing unit">graphics processing unit</a> (GPU) programming from <a href="/wiki/Advanced_Micro_Devices" class="mw-redirect" title="Advanced Micro Devices">Advanced Micro Devices</a> (AMD). </p> <div class="mw-heading mw-heading2"><h2 id="See_also">See also</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=CUDA&amp;action=edit&amp;section=20" title="Edit section: See also"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <ul><li><a href="/wiki/SYCL" title="SYCL">SYCL</a> – an open standard from <a href="/wiki/Khronos_Group" title="Khronos Group">Khronos Group</a> for programming a variety of platforms, including GPUs, with <i>single-source</i> modern C++, similar to higher-level CUDA <b>Runtime</b> API (<i>single-source</i>)</li> <li><a href="/wiki/BrookGPU" title="BrookGPU">BrookGPU</a> – the Stanford University graphics group's compiler</li> <li><a href="/wiki/Array_programming" title="Array programming">Array programming</a></li> <li><a href="/wiki/Parallel_computing" title="Parallel computing">Parallel computing</a></li> <li><a href="/wiki/Stream_processing" title="Stream processing">Stream processing</a></li> <li><a href="/wiki/RCUDA" title="RCUDA">rCUDA</a> – an API for computing on remote computers</li> <li><a href="/wiki/Molecular_modeling_on_GPUs" title="Molecular modeling on GPUs">Molecular modeling on GPUs</a></li> <li><a href="/wiki/Vulkan" title="Vulkan">Vulkan</a> – low-level, high-performance 3D graphics and computing API</li> <li><a href="/wiki/OptiX" title="OptiX">OptiX</a> – ray tracing API by NVIDIA</li> <li><a href="/wiki/CUDA_binary" class="mw-redirect" title="CUDA binary">CUDA binary</a> (cubin) – a type of fat binary</li> <li><a href="/wiki/Numerical_Library_Collection" class="mw-redirect" title="Numerical Library Collection">Numerical Library Collection</a> – by NEC for their vector processor</li></ul> <div class="mw-heading mw-heading2"><h2 id="References">References</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=CUDA&amp;action=edit&amp;section=21" title="Edit section: References"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <style data-mw-deduplicate="TemplateStyles:r1239543626">.mw-parser-output .reflist{margin-bottom:0.5em;list-style-type:decimal}@media screen{.mw-parser-output .reflist{font-size:90%}}.mw-parser-output .reflist .references{font-size:100%;margin-bottom:0;list-style-type:inherit}.mw-parser-output .reflist-columns-2{column-width:30em}.mw-parser-output .reflist-columns-3{column-width:25em}.mw-parser-output .reflist-columns{margin-top:0.3em}.mw-parser-output .reflist-columns ol{margin-top:0}.mw-parser-output .reflist-columns li{page-break-inside:avoid;break-inside:avoid-column}.mw-parser-output .reflist-upper-alpha{list-style-type:upper-alpha}.mw-parser-output .reflist-upper-roman{list-style-type:upper-roman}.mw-parser-output .reflist-lower-alpha{list-style-type:lower-alpha}.mw-parser-output .reflist-lower-greek{list-style-type:lower-greek}.mw-parser-output .reflist-lower-roman{list-style-type:lower-roman}</style><div class="reflist reflist-columns references-column-width" style="column-width: 30em;"> <ol class="references"> <li id="cite_note-1"><span class="mw-cite-backlink"><b><a href="#cite_ref-1">^</a></b></span> <span class="reference-text"><style data-mw-deduplicate="TemplateStyles:r1238218222">.mw-parser-output cite.citation{font-style:inherit;word-wrap:break-word}.mw-parser-output .citation q{quotes:"\"""\"""'""'"}.mw-parser-output .citation:target{background-color:rgba(0,127,255,0.133)}.mw-parser-output .id-lock-free.id-lock-free a{background:url("//upload.wikimedia.org/wikipedia/commons/6/65/Lock-green.svg")right 0.1em center/9px no-repeat}.mw-parser-output .id-lock-limited.id-lock-limited a,.mw-parser-output .id-lock-registration.id-lock-registration a{background:url("//upload.wikimedia.org/wikipedia/commons/d/d6/Lock-gray-alt-2.svg")right 0.1em center/9px no-repeat}.mw-parser-output .id-lock-subscription.id-lock-subscription a{background:url("//upload.wikimedia.org/wikipedia/commons/a/aa/Lock-red-alt-2.svg")right 0.1em center/9px no-repeat}.mw-parser-output .cs1-ws-icon a{background:url("//upload.wikimedia.org/wikipedia/commons/4/4c/Wikisource-logo.svg")right 0.1em center/12px no-repeat}body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-free a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-limited a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-registration a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .id-lock-subscription a,body:not(.skin-timeless):not(.skin-minerva) .mw-parser-output .cs1-ws-icon a{background-size:contain;padding:0 1em 0 0}.mw-parser-output .cs1-code{color:inherit;background:inherit;border:none;padding:inherit}.mw-parser-output .cs1-hidden-error{display:none;color:var(--color-error,#d33)}.mw-parser-output .cs1-visible-error{color:var(--color-error,#d33)}.mw-parser-output .cs1-maint{display:none;color:#085;margin-left:0.3em}.mw-parser-output .cs1-kern-left{padding-left:0.2em}.mw-parser-output .cs1-kern-right{padding-right:0.2em}.mw-parser-output .citation .mw-selflink{font-weight:inherit}@media screen{.mw-parser-output .cs1-format{font-size:95%}html.skin-theme-clientpref-night .mw-parser-output .cs1-maint{color:#18911f}}@media screen and (prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .cs1-maint{color:#18911f}}</style><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://web.archive.org/web/20070329144655/http://www.nvidia.com/object/IO_39918.html">"NVIDIA® CUDA™ Unleashes Power of GPU Computing - Press Release"</a>. <i>nvidia.com</i>. Archived from <a rel="nofollow" class="external text" href="http://www.nvidia.com/object/IO_39918.html">the original</a> on 29 March 2007<span class="reference-accessdate">. Retrieved <span class="nowrap">26 January</span> 2025</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=nvidia.com&amp;rft.atitle=NVIDIA%C2%AE+CUDA%E2%84%A2+Unleashes+Power+of+GPU+Computing+-+Press+Release&amp;rft_id=http%3A%2F%2Fwww.nvidia.com%2Fobject%2FIO_39918.html&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-:0-2"><span class="mw-cite-backlink">^ <a href="#cite_ref-:0_2-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-:0_2-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFShah" class="citation web cs1">Shah, Agam. <a rel="nofollow" class="external text" href="https://www.theregister.com/2021/11/10/nvidia_cuda_silicon/">"Nvidia not totally against third parties making CUDA chips"</a>. <i>www.theregister.com</i><span class="reference-accessdate">. Retrieved <span class="nowrap">2024-04-25</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=www.theregister.com&amp;rft.atitle=Nvidia+not+totally+against+third+parties+making+CUDA+chips&amp;rft.aulast=Shah&amp;rft.aufirst=Agam&amp;rft_id=https%3A%2F%2Fwww.theregister.com%2F2021%2F11%2F10%2Fnvidia_cuda_silicon%2F&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-3"><span class="mw-cite-backlink"><b><a href="#cite_ref-3">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://developer.nvidia.com/cuda-zone">"Nvidia CUDA Home Page"</a>. 18 July 2017.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=Nvidia+CUDA+Home+Page&amp;rft.date=2017-07-18&amp;rft_id=https%3A%2F%2Fdeveloper.nvidia.com%2Fcuda-zone&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-CUDA_intro_-_AnandTech-4"><span class="mw-cite-backlink"><b><a href="#cite_ref-CUDA_intro_-_AnandTech_4-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFShimpiWilson2006" class="citation web cs1">Shimpi, Anand Lal; Wilson, Derek (November 8, 2006). <a rel="nofollow" class="external text" href="https://www.anandtech.com/show/2116/8">"Nvidia's GeForce 8800 (G80): GPUs Re-architected for DirectX&#160;10"</a>. AnandTech<span class="reference-accessdate">. Retrieved <span class="nowrap">May 16,</span> 2015</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=Nvidia%27s+GeForce+8800+%28G80%29%3A+GPUs+Re-architected+for+DirectX+10&amp;rft.pub=AnandTech&amp;rft.date=2006-11-08&amp;rft.aulast=Shimpi&amp;rft.aufirst=Anand+Lal&amp;rft.au=Wilson%2C+Derek&amp;rft_id=https%3A%2F%2Fwww.anandtech.com%2Fshow%2F2116%2F8&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-5"><span class="mw-cite-backlink"><b><a href="#cite_ref-5">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://docs.nvidia.com/nsight-visual-studio-edition/introduction/index.html#cuda-debugger">"Introduction — nsight-visual-studio-edition 12.6 documentation"</a>. <i>docs.nvidia.com</i><span class="reference-accessdate">. Retrieved <span class="nowrap">2024-10-10</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=docs.nvidia.com&amp;rft.atitle=Introduction+%E2%80%94+nsight-visual-studio-edition+12.6+documentation&amp;rft_id=https%3A%2F%2Fdocs.nvidia.com%2Fnsight-visual-studio-edition%2Fintroduction%2Findex.html%23cuda-debugger&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-CUDA_intro_-_TomsHardware-6"><span class="mw-cite-backlink">^ <a href="#cite_ref-CUDA_intro_-_TomsHardware_6-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-CUDA_intro_-_TomsHardware_6-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFAbi-Chahla2008" class="citation web cs1">Abi-Chahla, Fedy (June 18, 2008). <a rel="nofollow" class="external text" href="https://www.tomshardware.com/reviews/nvidia-cuda-gpu,1954.html">"Nvidia's CUDA: The End of the CPU?"</a>. Tom's Hardware<span class="reference-accessdate">. Retrieved <span class="nowrap">May 17,</span> 2015</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=Nvidia%27s+CUDA%3A+The+End+of+the+CPU%3F&amp;rft.pub=Tom%27s+Hardware&amp;rft.date=2008-06-18&amp;rft.aulast=Abi-Chahla&amp;rft.aufirst=Fedy&amp;rft_id=https%3A%2F%2Fwww.tomshardware.com%2Freviews%2Fnvidia-cuda-gpu%2C1954.html&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-7"><span class="mw-cite-backlink"><b><a href="#cite_ref-7">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFZunitch2018" class="citation news cs1">Zunitch, Peter (2018-01-24). <a rel="nofollow" class="external text" href="https://www.videomaker.com/article/c15/19313-cuda-vs-opencl-vs-opengl">"CUDA vs. OpenCL vs. OpenGL"</a>. <i>Videomaker</i><span class="reference-accessdate">. Retrieved <span class="nowrap">2018-09-16</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Videomaker&amp;rft.atitle=CUDA+vs.+OpenCL+vs.+OpenGL&amp;rft.date=2018-01-24&amp;rft.aulast=Zunitch&amp;rft.aufirst=Peter&amp;rft_id=https%3A%2F%2Fwww.videomaker.com%2Farticle%2Fc15%2F19313-cuda-vs-opencl-vs-opengl&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-8"><span class="mw-cite-backlink"><b><a href="#cite_ref-8">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://developer.nvidia.com/opencl">"OpenCL"</a>. <i>NVIDIA Developer</i>. 2013-04-24<span class="reference-accessdate">. Retrieved <span class="nowrap">2019-11-04</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=NVIDIA+Developer&amp;rft.atitle=OpenCL&amp;rft.date=2013-04-24&amp;rft_id=https%3A%2F%2Fdeveloper.nvidia.com%2Fopencl&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-9"><span class="mw-cite-backlink"><b><a href="#cite_ref-9">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFWitt2023" class="citation magazine cs1">Witt, Stephen (2023-11-27). <a rel="nofollow" class="external text" href="https://www.newyorker.com/magazine/2023/12/04/how-jensen-huangs-nvidia-is-powering-the-ai-revolution">"How Jensen Huang's Nvidia Is Powering the A.I. Revolution"</a>. <i>The New Yorker</i>. <a href="/wiki/ISSN_(identifier)" class="mw-redirect" title="ISSN (identifier)">ISSN</a>&#160;<a rel="nofollow" class="external text" href="https://search.worldcat.org/issn/0028-792X">0028-792X</a><span class="reference-accessdate">. Retrieved <span class="nowrap">2023-12-10</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=The+New+Yorker&amp;rft.atitle=How+Jensen+Huang%27s+Nvidia+Is+Powering+the+A.I.+Revolution&amp;rft.date=2023-11-27&amp;rft.issn=0028-792X&amp;rft.aulast=Witt&amp;rft.aufirst=Stephen&amp;rft_id=https%3A%2F%2Fwww.newyorker.com%2Fmagazine%2F2023%2F12%2F04%2Fhow-jensen-huangs-nvidia-is-powering-the-ai-revolution&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-10"><span class="mw-cite-backlink"><b><a href="#cite_ref-10">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://developer.nvidia.com/cuda-llvm-compiler">"CUDA LLVM Compiler"</a>. 7 May 2012.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=CUDA+LLVM+Compiler&amp;rft.date=2012-05-07&amp;rft_id=https%3A%2F%2Fdeveloper.nvidia.com%2Fcuda-llvm-compiler&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-11"><span class="mw-cite-backlink"><b><a href="#cite_ref-11">^</a></b></span> <span class="reference-text"><a rel="nofollow" class="external text" href="https://www.youtube.com/watch?v=r1sN1ELJfNo"><span class="plainlinks">First OpenCL demo on a GPU</span></a> on <a href="/wiki/YouTube_video_(identifier)" class="mw-redirect" title="YouTube video (identifier)">YouTube</a></span> </li> <li id="cite_note-12"><span class="mw-cite-backlink"><b><a href="#cite_ref-12">^</a></b></span> <span class="reference-text"><a rel="nofollow" class="external text" href="https://www.youtube.com/watch?v=K1I4kts5mqc"><span class="plainlinks">DirectCompute Ocean Demo Running on Nvidia CUDA-enabled GPU</span></a> on <a href="/wiki/YouTube_video_(identifier)" class="mw-redirect" title="YouTube video (identifier)">YouTube</a></span> </li> <li id="cite_note-Ioannidis08-13"><span class="mw-cite-backlink"><b><a href="#cite_ref-Ioannidis08_13-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFVasiliadisAntonatosPolychronakisMarkatos2008" class="citation book cs1">Vasiliadis, Giorgos; Antonatos, Spiros; Polychronakis, Michalis; Markatos, Evangelos P.; Ioannidis, Sotiris (September 2008). <a rel="nofollow" class="external text" href="http://www.ics.forth.gr/dcs/Activities/papers/gnort.raid08.pdf">"Gnort: High Performance Network Intrusion Detection Using Graphics Processors"</a> <span class="cs1-format">(PDF)</span>. <i>Recent Advances in Intrusion Detection</i>. Lecture Notes in Computer Science. Vol.&#160;5230. pp.&#160;<span class="nowrap">116–</span>134. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1007%2F978-3-540-87403-4_7">10.1007/978-3-540-87403-4_7</a>. <a href="/wiki/ISBN_(identifier)" class="mw-redirect" title="ISBN (identifier)">ISBN</a>&#160;<a href="/wiki/Special:BookSources/978-3-540-87402-7" title="Special:BookSources/978-3-540-87402-7"><bdi>978-3-540-87402-7</bdi></a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.atitle=Gnort%3A+High+Performance+Network+Intrusion+Detection+Using+Graphics+Processors&amp;rft.btitle=Recent+Advances+in+Intrusion+Detection&amp;rft.series=Lecture+Notes+in+Computer+Science&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E116-%3C%2Fspan%3E134&amp;rft.date=2008-09&amp;rft_id=info%3Adoi%2F10.1007%2F978-3-540-87403-4_7&amp;rft.isbn=978-3-540-87402-7&amp;rft.aulast=Vasiliadis&amp;rft.aufirst=Giorgos&amp;rft.au=Antonatos%2C+Spiros&amp;rft.au=Polychronakis%2C+Michalis&amp;rft.au=Markatos%2C+Evangelos+P.&amp;rft.au=Ioannidis%2C+Sotiris&amp;rft_id=http%3A%2F%2Fwww.ics.forth.gr%2Fdcs%2FActivities%2Fpapers%2Fgnort.raid08.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-14"><span class="mw-cite-backlink"><b><a href="#cite_ref-14">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFSchatzTrapnellDelcherVarshney2007" class="citation journal cs1">Schatz, Michael C.; Trapnell, Cole; Delcher, Arthur L.; Varshney, Amitabh (2007). <a rel="nofollow" class="external text" href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2222658">"High-throughput sequence alignment using Graphics Processing Units"</a>. <i>BMC Bioinformatics</i>. <b>8</b>: 474. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://doi.org/10.1186%2F1471-2105-8-474">10.1186/1471-2105-8-474</a></span>. <a href="/wiki/PMC_(identifier)" class="mw-redirect" title="PMC (identifier)">PMC</a>&#160;<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2222658">2222658</a></span>. <a href="/wiki/PMID_(identifier)" class="mw-redirect" title="PMID (identifier)">PMID</a>&#160;<a rel="nofollow" class="external text" href="https://pubmed.ncbi.nlm.nih.gov/18070356">18070356</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=BMC+Bioinformatics&amp;rft.atitle=High-throughput+sequence+alignment+using+Graphics+Processing+Units&amp;rft.volume=8&amp;rft.pages=474&amp;rft.date=2007&amp;rft_id=https%3A%2F%2Fwww.ncbi.nlm.nih.gov%2Fpmc%2Farticles%2FPMC2222658%23id-name%3DPMC&amp;rft_id=info%3Apmid%2F18070356&amp;rft_id=info%3Adoi%2F10.1186%2F1471-2105-8-474&amp;rft.aulast=Schatz&amp;rft.aufirst=Michael+C.&amp;rft.au=Trapnell%2C+Cole&amp;rft.au=Delcher%2C+Arthur+L.&amp;rft.au=Varshney%2C+Amitabh&amp;rft_id=https%3A%2F%2Fwww.ncbi.nlm.nih.gov%2Fpmc%2Farticles%2FPMC2222658&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-Manavski2008-15"><span class="mw-cite-backlink"><b><a href="#cite_ref-Manavski2008_15-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFManavskiGiorgio2008" class="citation journal cs1">Manavski, Svetlin A.; Giorgio, Valle (2008). <a rel="nofollow" class="external text" href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2323659">"CUDA compatible GPU cards as efficient hardware accelerators for Smith-Waterman sequence alignment"</a>. <i>BMC Bioinformatics</i>. <b>10</b> (Suppl 2): S10. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://doi.org/10.1186%2F1471-2105-9-S2-S10">10.1186/1471-2105-9-S2-S10</a></span>. <a href="/wiki/PMC_(identifier)" class="mw-redirect" title="PMC (identifier)">PMC</a>&#160;<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2323659">2323659</a></span>. <a href="/wiki/PMID_(identifier)" class="mw-redirect" title="PMID (identifier)">PMID</a>&#160;<a rel="nofollow" class="external text" href="https://pubmed.ncbi.nlm.nih.gov/18387198">18387198</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=BMC+Bioinformatics&amp;rft.atitle=CUDA+compatible+GPU+cards+as+efficient+hardware+accelerators+for+Smith-Waterman+sequence+alignment&amp;rft.volume=10&amp;rft.issue=Suppl+2&amp;rft.pages=S10&amp;rft.date=2008&amp;rft_id=https%3A%2F%2Fwww.ncbi.nlm.nih.gov%2Fpmc%2Farticles%2FPMC2323659%23id-name%3DPMC&amp;rft_id=info%3Apmid%2F18387198&amp;rft_id=info%3Adoi%2F10.1186%2F1471-2105-9-S2-S10&amp;rft.aulast=Manavski&amp;rft.aufirst=Svetlin+A.&amp;rft.au=Giorgio%2C+Valle&amp;rft_id=https%3A%2F%2Fwww.ncbi.nlm.nih.gov%2Fpmc%2Farticles%2FPMC2323659&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-16"><span class="mw-cite-backlink"><b><a href="#cite_ref-16">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://code.google.com/p/pyrit/">"Pyrit – Google Code"</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=Pyrit+%E2%80%93+Google+Code&amp;rft_id=https%3A%2F%2Fcode.google.com%2Fp%2Fpyrit%2F&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-17"><span class="mw-cite-backlink"><b><a href="#cite_ref-17">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://web.archive.org/web/20081228022142/http://boinc.berkeley.edu/cuda.php">"Use your Nvidia GPU for scientific computing"</a>. BOINC. 2008-12-18. Archived from <a rel="nofollow" class="external text" href="http://boinc.berkeley.edu/cuda.php">the original</a> on 2008-12-28<span class="reference-accessdate">. Retrieved <span class="nowrap">2017-08-08</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=Use+your+Nvidia+GPU+for+scientific+computing&amp;rft.pub=BOINC&amp;rft.date=2008-12-18&amp;rft_id=http%3A%2F%2Fboinc.berkeley.edu%2Fcuda.php&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-18"><span class="mw-cite-backlink"><b><a href="#cite_ref-18">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://web.archive.org/web/20090106020401/http://developer.download.nvidia.com/compute/cuda/sdk/website/doc/CUDA_SDK_release_notes_macosx.txt">"Nvidia CUDA Software Development Kit (CUDA SDK) – Release Notes Version 2.0 for MAC OS X"</a>. Archived from <a rel="nofollow" class="external text" href="http://developer.download.nvidia.com/compute/cuda/sdk/website/doc/CUDA_SDK_release_notes_macosx.txt">the original</a> on 2009-01-06.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=Nvidia+CUDA+Software+Development+Kit+%28CUDA+SDK%29+%E2%80%93+Release+Notes+Version+2.0+for+MAC+OS+X&amp;rft_id=http%3A%2F%2Fdeveloper.download.nvidia.com%2Fcompute%2Fcuda%2Fsdk%2Fwebsite%2Fdoc%2FCUDA_SDK_release_notes_macosx.txt&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-19"><span class="mw-cite-backlink"><b><a href="#cite_ref-19">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://web.archive.org/web/20081122105633/http://news.developer.nvidia.com/2008/02/cuda-11---now-o.html">"CUDA 1.1 – Now on Mac OS X"</a>. February 14, 2008. Archived from <a rel="nofollow" class="external text" href="http://news.developer.nvidia.com/2008/02/cuda-11---now-o.html">the original</a> on November 22, 2008.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=CUDA+1.1+%E2%80%93+Now+on+Mac+OS+X&amp;rft.date=2008-02-14&amp;rft_id=http%3A%2F%2Fnews.developer.nvidia.com%2F2008%2F02%2Fcuda-11---now-o.html&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-20"><span class="mw-cite-backlink"><b><a href="#cite_ref-20">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://developer.nvidia.com/blog/cuda-11-features-revealed/">"CUDA 11 Features Revealed"</a>. 14 May 2020.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=CUDA+11+Features+Revealed&amp;rft.date=2020-05-14&amp;rft_id=https%3A%2F%2Fdeveloper.nvidia.com%2Fblog%2Fcuda-11-features-revealed%2F&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-21"><span class="mw-cite-backlink"><b><a href="#cite_ref-21">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://developer.nvidia.com/blog/cuda-11-1-introduces-support-rtx-30-series/">"CUDA Toolkit 11.1 Introduces Support for GeForce RTX 30 Series and Quadro RTX Series GPUs"</a>. 23 September 2020.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=CUDA+Toolkit+11.1+Introduces+Support+for+GeForce+RTX+30+Series+and+Quadro+RTX+Series+GPUs&amp;rft.date=2020-09-23&amp;rft_id=https%3A%2F%2Fdeveloper.nvidia.com%2Fblog%2Fcuda-11-1-introduces-support-rtx-30-series%2F&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-22"><span class="mw-cite-backlink"><b><a href="#cite_ref-22">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://developer.nvidia.com/blog/enhancing-memory-allocation-with-new-cuda-11-2-features/">"Enhancing Memory Allocation with New NVIDIA CUDA 11.2 Features"</a>. 16 December 2020.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=Enhancing+Memory+Allocation+with+New+NVIDIA+CUDA+11.2+Features&amp;rft.date=2020-12-16&amp;rft_id=https%3A%2F%2Fdeveloper.nvidia.com%2Fblog%2Fenhancing-memory-allocation-with-new-cuda-11-2-features%2F&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-23"><span class="mw-cite-backlink"><b><a href="#cite_ref-23">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://developer.nvidia.com/blog/exploring-the-new-features-of-cuda-11-3/">"Exploring the New Features of CUDA 11.3"</a>. 16 April 2021.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=Exploring+the+New+Features+of+CUDA+11.3&amp;rft.date=2021-04-16&amp;rft_id=https%3A%2F%2Fdeveloper.nvidia.com%2Fblog%2Fexploring-the-new-features-of-cuda-11-3%2F&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-24"><span class="mw-cite-backlink"><b><a href="#cite_ref-24">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFSilbersteinSchusterGeigerPatney2008" class="citation conference cs1">Silberstein, Mark; <a href="/wiki/Assaf_Schuster" title="Assaf Schuster">Schuster, Assaf</a>; Geiger, Dan; Patney, Anjul; Owens, John D. (2008). <a rel="nofollow" class="external text" href="https://escholarship.org/content/qt8js4v3f7/qt8js4v3f7.pdf?t=ptt3te">"Efficient computation of sum-products on GPUs through software-managed cache"</a> <span class="cs1-format">(PDF)</span>. <a rel="nofollow" class="external text" href="https://escholarship.org/content/qt8js4v3f7/qt8js4v3f7.pdf?t=ptt3te"><i>Proceedings of the 22nd annual international conference on Supercomputing – ICS '08</i></a> <span class="cs1-format">(PDF)</span>. Proceedings of the 22nd annual international conference on Supercomputing – ICS '08. pp.&#160;<span class="nowrap">309–</span>318. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1145%2F1375527.1375572">10.1145/1375527.1375572</a>. <a href="/wiki/ISBN_(identifier)" class="mw-redirect" title="ISBN (identifier)">ISBN</a>&#160;<a href="/wiki/Special:BookSources/978-1-60558-158-3" title="Special:BookSources/978-1-60558-158-3"><bdi>978-1-60558-158-3</bdi></a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.atitle=Efficient+computation+of+sum-products+on+GPUs+through+software-managed+cache&amp;rft.btitle=Proceedings+of+the+22nd+annual+international+conference+on+Supercomputing+%E2%80%93+ICS+%2708&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E309-%3C%2Fspan%3E318&amp;rft.date=2008&amp;rft_id=info%3Adoi%2F10.1145%2F1375527.1375572&amp;rft.isbn=978-1-60558-158-3&amp;rft.aulast=Silberstein&amp;rft.aufirst=Mark&amp;rft.au=Schuster%2C+Assaf&amp;rft.au=Geiger%2C+Dan&amp;rft.au=Patney%2C+Anjul&amp;rft.au=Owens%2C+John+D.&amp;rft_id=https%3A%2F%2Fescholarship.org%2Fcontent%2Fqt8js4v3f7%2Fqt8js4v3f7.pdf%3Ft%3Dptt3te&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-CUDA_Prog_v8-25"><span class="mw-cite-backlink"><b><a href="#cite_ref-CUDA_Prog_v8_25-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="http://docs.nvidia.com/cuda/pdf/CUDA_C_Programming_Guide.pdf">"CUDA C Programming Guide v8.0"</a> <span class="cs1-format">(PDF)</span>. <i>nVidia Developer Zone</i>. January 2017. p.&#160;19<span class="reference-accessdate">. Retrieved <span class="nowrap">22 March</span> 2017</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=nVidia+Developer+Zone&amp;rft.atitle=CUDA+C+Programming+Guide+v8.0&amp;rft.pages=19&amp;rft.date=2017-01&amp;rft_id=http%3A%2F%2Fdocs.nvidia.com%2Fcuda%2Fpdf%2FCUDA_C_Programming_Guide.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-26"><span class="mw-cite-backlink"><b><a href="#cite_ref-26">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://devtalk.nvidia.com/default/topic/508479/cuda-programming-and-performance/nvcc-forces-c-compilation-of-cu-files/#entry1340190">"NVCC forces c++ compilation of .cu files"</a>. 29 November 2011.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=NVCC+forces+c%2B%2B+compilation+of+.cu+files&amp;rft.date=2011-11-29&amp;rft_id=https%3A%2F%2Fdevtalk.nvidia.com%2Fdefault%2Ftopic%2F508479%2Fcuda-programming-and-performance%2Fnvcc-forces-c-compilation-of-cu-files%2F%23entry1340190&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-27"><span class="mw-cite-backlink"><b><a href="#cite_ref-27">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFWhiteheadFit-Florea" class="citation web cs1">Whitehead, Nathan; Fit-Florea, Alex. <a rel="nofollow" class="external text" href="https://developer.nvidia.com/sites/default/files/akamai/cuda/files/NVIDIA-CUDA-Floating-Point.pdf">"Precision &amp; Performance: Floating Point and IEEE 754 Compliance for Nvidia GPUs"</a> <span class="cs1-format">(PDF)</span>. <a href="/wiki/Nvidia" title="Nvidia">Nvidia</a><span class="reference-accessdate">. Retrieved <span class="nowrap">November 18,</span> 2014</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=Precision+%26+Performance%3A+Floating+Point+and+IEEE+754+Compliance+for+Nvidia+GPUs&amp;rft.pub=Nvidia&amp;rft.aulast=Whitehead&amp;rft.aufirst=Nathan&amp;rft.au=Fit-Florea%2C+Alex&amp;rft_id=https%3A%2F%2Fdeveloper.nvidia.com%2Fsites%2Fdefault%2Ffiles%2Fakamai%2Fcuda%2Ffiles%2FNVIDIA-CUDA-Floating-Point.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-CUDA_products-28"><span class="mw-cite-backlink"><b><a href="#cite_ref-CUDA_products_28-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://www.nvidia.com/object/cuda_learn_products.html">"CUDA-Enabled Products"</a>. <i>CUDA Zone</i>. Nvidia Corporation<span class="reference-accessdate">. Retrieved <span class="nowrap">2008-11-03</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=CUDA+Zone&amp;rft.atitle=CUDA-Enabled+Products&amp;rft_id=https%3A%2F%2Fwww.nvidia.com%2Fobject%2Fcuda_learn_products.html&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-29"><span class="mw-cite-backlink"><b><a href="#cite_ref-29">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="http://www.phoronix.com/scan.php?page=news_item&amp;px=CUDA-On-CL-Coriander">"Coriander Project: Compile CUDA Codes To OpenCL, Run Everywhere"</a>. Phoronix.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=Coriander+Project%3A+Compile+CUDA+Codes+To+OpenCL%2C+Run+Everywhere&amp;rft.pub=Phoronix&amp;rft_id=http%3A%2F%2Fwww.phoronix.com%2Fscan.php%3Fpage%3Dnews_item%26px%3DCUDA-On-CL-Coriander&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-30"><span class="mw-cite-backlink"><b><a href="#cite_ref-30">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFPerkins2017" class="citation web cs1">Perkins, Hugh (2017). <a rel="nofollow" class="external text" href="http://www.iwocl.org/wp-content/uploads/iwocl2017-hugh-perkins-cuda-cl.pdf">"cuda-on-cl"</a> <span class="cs1-format">(PDF)</span>. IWOCL<span class="reference-accessdate">. Retrieved <span class="nowrap">August 8,</span> 2017</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=cuda-on-cl&amp;rft.pub=IWOCL&amp;rft.date=2017&amp;rft.aulast=Perkins&amp;rft.aufirst=Hugh&amp;rft_id=http%3A%2F%2Fwww.iwocl.org%2Fwp-content%2Fuploads%2Fiwocl2017-hugh-perkins-cuda-cl.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-31"><span class="mw-cite-backlink"><b><a href="#cite_ref-31">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://github.com/hughperkins/coriander">"hughperkins/coriander: Build NVIDIA® CUDA™ code for OpenCL™ 1.2 devices"</a>. GitHub. May 6, 2019.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=hughperkins%2Fcoriander%3A+Build+NVIDIA%C2%AE+CUDA%E2%84%A2+code+for+OpenCL%E2%84%A2+1.2+devices&amp;rft.pub=GitHub&amp;rft.date=2019-05-06&amp;rft_id=https%3A%2F%2Fgithub.com%2Fhughperkins%2Fcoriander&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-32"><span class="mw-cite-backlink"><b><a href="#cite_ref-32">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="http://chrec.cs.vt.edu/cu2cl/documentation.php">"CU2CL Documentation"</a>. <i>chrec.cs.vt.edu</i>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=chrec.cs.vt.edu&amp;rft.atitle=CU2CL+Documentation&amp;rft_id=http%3A%2F%2Fchrec.cs.vt.edu%2Fcu2cl%2Fdocumentation.php&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-33"><span class="mw-cite-backlink"><b><a href="#cite_ref-33">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://github.com/vosen/ZLUDA">"GitHub – vosen/ZLUDA"</a>. <i><a href="/wiki/GitHub" title="GitHub">GitHub</a></i>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=GitHub&amp;rft.atitle=GitHub+%E2%80%93+vosen%2FZLUDA&amp;rft_id=https%3A%2F%2Fgithub.com%2Fvosen%2FZLUDA&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-34"><span class="mw-cite-backlink"><b><a href="#cite_ref-34">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFLarabel2024" class="citation cs2">Larabel, Michael (2024-02-12), <a rel="nofollow" class="external text" href="https://www.phoronix.com/review/radeon-cuda-zluda">"AMD Quietly Funded A Drop-In CUDA Implementation Built On ROCm: It's Now Open-Source"</a>, <i><a href="/wiki/Phoronix_Test_Suite#Phoronix_website" title="Phoronix Test Suite">Phoronix</a></i><span class="reference-accessdate">, retrieved <span class="nowrap">2024-02-12</span></span></cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Phoronix&amp;rft.atitle=AMD+Quietly+Funded+A+Drop-In+CUDA+Implementation+Built+On+ROCm%3A+It%27s+Now+Open-Source&amp;rft.date=2024-02-12&amp;rft.aulast=Larabel&amp;rft.aufirst=Michael&amp;rft_id=https%3A%2F%2Fwww.phoronix.com%2Freview%2Fradeon-cuda-zluda&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-35"><span class="mw-cite-backlink"><b><a href="#cite_ref-35">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://github.com/chip-spv/chipStar">"GitHub – chip-spv/chipStar"</a>. <i><a href="/wiki/GitHub" title="GitHub">GitHub</a></i>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=GitHub&amp;rft.atitle=GitHub+%E2%80%93+chip-spv%2FchipStar&amp;rft_id=https%3A%2F%2Fgithub.com%2Fchip-spv%2FchipStar&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-36"><span class="mw-cite-backlink"><b><a href="#cite_ref-36">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="http://mathema.tician.de/software/pycuda">"PyCUDA"</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=PyCUDA&amp;rft_id=http%3A%2F%2Fmathema.tician.de%2Fsoftware%2Fpycuda&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-37"><span class="mw-cite-backlink"><b><a href="#cite_ref-37">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://web.archive.org/web/20090420124748/http://kered.org/blog/2009-04-13/easy-python-numpy-cuda-cublas/">"pycublas"</a>. Archived from <a rel="nofollow" class="external text" href="http://kered.org/blog/2009-04-13/easy-python-numpy-cuda-cublas/">the original</a> on 2009-04-20<span class="reference-accessdate">. Retrieved <span class="nowrap">2017-08-08</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=pycublas&amp;rft_id=http%3A%2F%2Fkered.org%2Fblog%2F2009-04-13%2Feasy-python-numpy-cuda-cublas%2F&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-38"><span class="mw-cite-backlink"><b><a href="#cite_ref-38">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://cupy.dev/">"CuPy"</a><span class="reference-accessdate">. Retrieved <span class="nowrap">2020-01-08</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=CuPy&amp;rft_id=https%3A%2F%2Fcupy.dev%2F&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-39"><span class="mw-cite-backlink"><b><a href="#cite_ref-39">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="http://developer.download.nvidia.com/compute/cuda/1.0/NVIDIA_CUDA_Programming_Guide_1.0.pdf">"NVIDIA CUDA Programming Guide. Version 1.0"</a> <span class="cs1-format">(PDF)</span>. June 23, 2007.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=NVIDIA+CUDA+Programming+Guide.+Version+1.0&amp;rft.date=2007-06-23&amp;rft_id=http%3A%2F%2Fdeveloper.download.nvidia.com%2Fcompute%2Fcuda%2F1.0%2FNVIDIA_CUDA_Programming_Guide_1.0.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-40"><span class="mw-cite-backlink"><b><a href="#cite_ref-40">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="http://developer.download.nvidia.com/compute/cuda/2_1/toolkit/docs/NVIDIA_CUDA_Programming_Guide_2.1.pdf">"NVIDIA CUDA Programming Guide. Version 2.1"</a> <span class="cs1-format">(PDF)</span>. December 8, 2008.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=NVIDIA+CUDA+Programming+Guide.+Version+2.1&amp;rft.date=2008-12-08&amp;rft_id=http%3A%2F%2Fdeveloper.download.nvidia.com%2Fcompute%2Fcuda%2F2_1%2Ftoolkit%2Fdocs%2FNVIDIA_CUDA_Programming_Guide_2.1.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-41"><span class="mw-cite-backlink"><b><a href="#cite_ref-41">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="http://developer.download.nvidia.com/compute/cuda/2_2/toolkit/docs/NVIDIA_CUDA_Programming_Guide_2.2.pdf">"NVIDIA CUDA Programming Guide. Version 2.2"</a> <span class="cs1-format">(PDF)</span>. April 2, 2009.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=NVIDIA+CUDA+Programming+Guide.+Version+2.2&amp;rft.date=2009-04-02&amp;rft_id=http%3A%2F%2Fdeveloper.download.nvidia.com%2Fcompute%2Fcuda%2F2_2%2Ftoolkit%2Fdocs%2FNVIDIA_CUDA_Programming_Guide_2.2.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-42"><span class="mw-cite-backlink"><b><a href="#cite_ref-42">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="http://developer.download.nvidia.com/compute/cuda/2_21/toolkit/docs/NVIDIA_CUDA_Programming_Guide_2.2.1.pdf">"NVIDIA CUDA Programming Guide. Version 2.2.1"</a> <span class="cs1-format">(PDF)</span>. May 26, 2009.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=NVIDIA+CUDA+Programming+Guide.+Version+2.2.1&amp;rft.date=2009-05-26&amp;rft_id=http%3A%2F%2Fdeveloper.download.nvidia.com%2Fcompute%2Fcuda%2F2_21%2Ftoolkit%2Fdocs%2FNVIDIA_CUDA_Programming_Guide_2.2.1.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-43"><span class="mw-cite-backlink"><b><a href="#cite_ref-43">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="http://developer.download.nvidia.com/compute/cuda/2_3/toolkit/docs/NVIDIA_CUDA_Programming_Guide_2.3.pdf">"NVIDIA CUDA Programming Guide. Version 2.3.1"</a> <span class="cs1-format">(PDF)</span>. August 26, 2009.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=NVIDIA+CUDA+Programming+Guide.+Version+2.3.1&amp;rft.date=2009-08-26&amp;rft_id=http%3A%2F%2Fdeveloper.download.nvidia.com%2Fcompute%2Fcuda%2F2_3%2Ftoolkit%2Fdocs%2FNVIDIA_CUDA_Programming_Guide_2.3.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-44"><span class="mw-cite-backlink"><b><a href="#cite_ref-44">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="http://developer.download.nvidia.com/compute/cuda/3_0/toolkit/docs/NVIDIA_CUDA_ProgrammingGuide.pdf">"NVIDIA CUDA Programming Guide. Version 3.0"</a> <span class="cs1-format">(PDF)</span>. February 20, 2010.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=NVIDIA+CUDA+Programming+Guide.+Version+3.0&amp;rft.date=2010-02-20&amp;rft_id=http%3A%2F%2Fdeveloper.download.nvidia.com%2Fcompute%2Fcuda%2F3_0%2Ftoolkit%2Fdocs%2FNVIDIA_CUDA_ProgrammingGuide.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-45"><span class="mw-cite-backlink"><b><a href="#cite_ref-45">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="http://developer.download.nvidia.com/compute/cuda/3_1/toolkit/docs/NVIDIA_CUDA_C_ProgrammingGuide_3.1.pdf">"NVIDIA CUDA C Programming Guide. Version 3.1.1"</a> <span class="cs1-format">(PDF)</span>. July 21, 2010.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=NVIDIA+CUDA+C+Programming+Guide.+Version+3.1.1&amp;rft.date=2010-07-21&amp;rft_id=http%3A%2F%2Fdeveloper.download.nvidia.com%2Fcompute%2Fcuda%2F3_1%2Ftoolkit%2Fdocs%2FNVIDIA_CUDA_C_ProgrammingGuide_3.1.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-46"><span class="mw-cite-backlink"><b><a href="#cite_ref-46">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="http://developer.download.nvidia.com/compute/cuda/3_2_prod/toolkit/docs/CUDA_C_Programming_Guide.pdf">"NVIDIA CUDA C Programming Guide. Version 3.2"</a> <span class="cs1-format">(PDF)</span>. November 9, 2010.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=NVIDIA+CUDA+C+Programming+Guide.+Version+3.2&amp;rft.date=2010-11-09&amp;rft_id=http%3A%2F%2Fdeveloper.download.nvidia.com%2Fcompute%2Fcuda%2F3_2_prod%2Ftoolkit%2Fdocs%2FCUDA_C_Programming_Guide.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-47"><span class="mw-cite-backlink"><b><a href="#cite_ref-47">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://docs.nvidia.com/cuda/archive/11.0/cuda-toolkit-release-notes/index.html">"CUDA 11.0 Release Notes"</a>. <i>NVIDIA Developer</i>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=NVIDIA+Developer&amp;rft.atitle=CUDA+11.0+Release+Notes&amp;rft_id=https%3A%2F%2Fdocs.nvidia.com%2Fcuda%2Farchive%2F11.0%2Fcuda-toolkit-release-notes%2Findex.html&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-48"><span class="mw-cite-backlink"><b><a href="#cite_ref-48">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://docs.nvidia.com/cuda/archive/11.1.0/cuda-toolkit-release-notes/index.html">"CUDA 11.1 Release Notes"</a>. <i>NVIDIA Developer</i>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=NVIDIA+Developer&amp;rft.atitle=CUDA+11.1+Release+Notes&amp;rft_id=https%3A%2F%2Fdocs.nvidia.com%2Fcuda%2Farchive%2F11.1.0%2Fcuda-toolkit-release-notes%2Findex.html&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-49"><span class="mw-cite-backlink"><b><a href="#cite_ref-49">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://docs.nvidia.com/cuda/archive/11.5.0/cuda-toolkit-release-notes/index.html">"CUDA 11.5 Release Notes"</a>. <i>NVIDIA Developer</i>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=NVIDIA+Developer&amp;rft.atitle=CUDA+11.5+Release+Notes&amp;rft_id=https%3A%2F%2Fdocs.nvidia.com%2Fcuda%2Farchive%2F11.5.0%2Fcuda-toolkit-release-notes%2Findex.html&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-50"><span class="mw-cite-backlink"><b><a href="#cite_ref-50">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://docs.nvidia.com/cuda/archive/11.8.0/cuda-toolkit-release-notes/index.html">"CUDA 11.8 Release Notes"</a>. <i>NVIDIA Developer</i>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=NVIDIA+Developer&amp;rft.atitle=CUDA+11.8+Release+Notes&amp;rft_id=https%3A%2F%2Fdocs.nvidia.com%2Fcuda%2Farchive%2F11.8.0%2Fcuda-toolkit-release-notes%2Findex.html&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-51"><span class="mw-cite-backlink"><b><a href="#cite_ref-51">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://www.techpowerup.com/gpu-specs/quadro-nvs-420.c1448">"NVIDIA Quadro NVS 420 Specs"</a>. <i>TechPowerUp GPU Database</i>. 25 August 2023.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=TechPowerUp+GPU+Database&amp;rft.atitle=NVIDIA+Quadro+NVS+420+Specs&amp;rft.date=2023-08-25&amp;rft_id=https%3A%2F%2Fwww.techpowerup.com%2Fgpu-specs%2Fquadro-nvs-420.c1448&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-52"><span class="mw-cite-backlink"><b><a href="#cite_ref-52">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFLarabel2017" class="citation web cs1"><a href="/wiki/Michael_Larabel" class="mw-redirect" title="Michael Larabel">Larabel, Michael</a> (March 29, 2017). <a rel="nofollow" class="external text" href="http://www.phoronix.com/scan.php?page=news_item&amp;px=Tegra-X2-Nouveau-Support">"NVIDIA Rolls Out Tegra X2 GPU Support In Nouveau"</a>. <a href="/wiki/Phoronix" class="mw-redirect" title="Phoronix">Phoronix</a><span class="reference-accessdate">. Retrieved <span class="nowrap">August 8,</span> 2017</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=NVIDIA+Rolls+Out+Tegra+X2+GPU+Support+In+Nouveau&amp;rft.pub=Phoronix&amp;rft.date=2017-03-29&amp;rft.aulast=Larabel&amp;rft.aufirst=Michael&amp;rft_id=http%3A%2F%2Fwww.phoronix.com%2Fscan.php%3Fpage%3Dnews_item%26px%3DTegra-X2-Nouveau-Support&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-53"><span class="mw-cite-backlink"><b><a href="#cite_ref-53">^</a></b></span> <span class="reference-text"><a rel="nofollow" class="external text" href="https://www.techpowerup.com/gpudb/3232/xavier">Nvidia Xavier Specs</a> on TechPowerUp (preliminary)</span> </li> <li id="cite_note-54"><span class="mw-cite-backlink"><b><a href="#cite_ref-54">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://docs.nvidia.com/jetson/l4t/index.html#page/Tegra%20Linux%20Driver%20Package%20Development%20Guide/power_management_jetson_xavier.html">"Welcome — Jetson LinuxDeveloper Guide 34.1 documentation"</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=Welcome+%E2%80%94+Jetson+LinuxDeveloper+Guide+34.1+documentation&amp;rft_id=https%3A%2F%2Fdocs.nvidia.com%2Fjetson%2Fl4t%2Findex.html%23page%2FTegra%2520Linux%2520Driver%2520Package%2520Development%2520Guide%2Fpower_management_jetson_xavier.html&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-55"><span class="mw-cite-backlink"><b><a href="#cite_ref-55">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://www.phoronix.com/scan.php?page=news_item&amp;px=NVIDIA-Nouveau-GV11B-Volta-Xav">"NVIDIA Bringing up Open-Source Volta GPU Support for Their Xavier SoC"</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=NVIDIA+Bringing+up+Open-Source+Volta+GPU+Support+for+Their+Xavier+SoC&amp;rft_id=https%3A%2F%2Fwww.phoronix.com%2Fscan.php%3Fpage%3Dnews_item%26px%3DNVIDIA-Nouveau-GV11B-Volta-Xav&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-56"><span class="mw-cite-backlink"><b><a href="#cite_ref-56">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://www.nvidia.com/en-us/geforce/ada-lovelace-architecture/">"NVIDIA Ada Lovelace Architecture"</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=NVIDIA+Ada+Lovelace+Architecture&amp;rft_id=https%3A%2F%2Fwww.nvidia.com%2Fen-us%2Fgeforce%2Fada-lovelace-architecture%2F&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-57"><span class="mw-cite-backlink"><b><a href="#cite_ref-57">^</a></b></span> <span class="reference-text"><a rel="nofollow" class="external text" href="https://developer.download.nvidia.com/video/gputechconf/gtc/2019/presentation/s9839-discovering-the-turing-t4-gpu-architecture-with-microbenchmarks.pdf">Dissecting the Turing GPU Architecture through Microbenchmarking</a></span> </li> <li id="cite_note-58"><span class="mw-cite-backlink"><b><a href="#cite_ref-58">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#features-and-technical-specifications">"H.1. Features and Technical Specifications&#160;&#8211;&#32; Table 13. Feature Support per Compute Capability"</a>. <i>docs.nvidia.com</i><span class="reference-accessdate">. Retrieved <span class="nowrap">2020-09-23</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=docs.nvidia.com&amp;rft.atitle=H.1.+Features+and+Technical+Specifications+%26ndash%3B%26%2332%3B+Table+13.+Feature+Support+per+Compute+Capability&amp;rft_id=https%3A%2F%2Fdocs.nvidia.com%2Fcuda%2Fcuda-c-programming-guide%2Findex.html%23features-and-technical-specifications&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-59"><span class="mw-cite-backlink"><b><a href="#cite_ref-59">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#features-and-technical-specifications">"CUDA C++ Programming Guide"</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=CUDA+C%2B%2B+Programming+Guide&amp;rft_id=https%3A%2F%2Fdocs.nvidia.com%2Fcuda%2Fcuda-c-programming-guide%2Findex.html%23features-and-technical-specifications&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-60"><span class="mw-cite-backlink"><b><a href="#cite_ref-60">^</a></b></span> <span class="reference-text">Fused-Multiply-Add, actually executed, Dense Matrix</span> </li> <li id="cite_note-61"><span class="mw-cite-backlink"><b><a href="#cite_ref-61">^</a></b></span> <span class="reference-text">as SASS since 7.5, as PTX since 8.0</span> </li> <li id="cite_note-unofficial_support_in_SASS-62"><span class="mw-cite-backlink">^ <a href="#cite_ref-unofficial_support_in_SASS_62-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-unofficial_support_in_SASS_62-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text">unofficial support in SASS</span> </li> <li id="cite_note-63"><span class="mw-cite-backlink"><b><a href="#cite_ref-63">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://www.nvidia.com/content/dam/en-zz/Solutions/gtcf21/jetson-orin/nvidia-jetson-agx-orin-technical-brief.pdf">"Technical brief. NVIDIA Jetson AGX Orin Series"</a> <span class="cs1-format">(PDF)</span>. <i>nvidia.com</i><span class="reference-accessdate">. Retrieved <span class="nowrap">5 September</span> 2023</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=nvidia.com&amp;rft.atitle=Technical+brief.+NVIDIA+Jetson+AGX+Orin+Series&amp;rft_id=https%3A%2F%2Fwww.nvidia.com%2Fcontent%2Fdam%2Fen-zz%2FSolutions%2Fgtcf21%2Fjetson-orin%2Fnvidia-jetson-agx-orin-technical-brief.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-64"><span class="mw-cite-backlink"><b><a href="#cite_ref-64">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://images.nvidia.com/aem-dam/en-zz/Solutions/geforce/ampere/pdf/NVIDIA-ampere-GA102-GPU-Architecture-Whitepaper-V1.pdf">"NVIDIA Ampere GA102 GPU Architecture"</a> <span class="cs1-format">(PDF)</span>. <i>nvidia.com</i><span class="reference-accessdate">. Retrieved <span class="nowrap">5 September</span> 2023</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=nvidia.com&amp;rft.atitle=NVIDIA+Ampere+GA102+GPU+Architecture&amp;rft_id=https%3A%2F%2Fimages.nvidia.com%2Faem-dam%2Fen-zz%2FSolutions%2Fgeforce%2Fampere%2Fpdf%2FNVIDIA-ampere-GA102-GPU-Architecture-Whitepaper-V1.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-65"><span class="mw-cite-backlink"><b><a href="#cite_ref-65">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFLuoFanLiDu2024" class="citation arxiv cs1">Luo, Weile; Fan, Ruibo; Li, Zeyu; Du, Dayou; Wang, Qiang; Chu, Xiaowen (2024). "Benchmarking and Dissecting the Nvidia Hopper GPU Architecture". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2402.13499v1">2402.13499v1</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.AR">cs.AR</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=Benchmarking+and+Dissecting+the+Nvidia+Hopper+GPU+Architecture&amp;rft.date=2024&amp;rft_id=info%3Aarxiv%2F2402.13499v1&amp;rft.aulast=Luo&amp;rft.aufirst=Weile&amp;rft.au=Fan%2C+Ruibo&amp;rft.au=Li%2C+Zeyu&amp;rft.au=Du%2C+Dayou&amp;rft.au=Wang%2C+Qiang&amp;rft.au=Chu%2C+Xiaowen&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-66"><span class="mw-cite-backlink"><b><a href="#cite_ref-66">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://images.nvidia.com/content/Solutions/data-center/a40/nvidia-a40-datasheet.pdf">"Datasheet NVIDIA A40"</a> <span class="cs1-format">(PDF)</span>. <i>nvidia.com</i><span class="reference-accessdate">. Retrieved <span class="nowrap">27 April</span> 2024</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=nvidia.com&amp;rft.atitle=Datasheet+NVIDIA+A40&amp;rft_id=https%3A%2F%2Fimages.nvidia.com%2Fcontent%2FSolutions%2Fdata-center%2Fa40%2Fnvidia-a40-datasheet.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-67"><span class="mw-cite-backlink"><b><a href="#cite_ref-67">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://www.nvidia.com/content/PDF/nvidia-ampere-ga-102-gpu-architecture-whitepaper-v2.1.pdf">"NVIDIA AMPERE GA102 GPU ARCHITECTURE"</a> <span class="cs1-format">(PDF)</span>. 27 April 2024.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=NVIDIA+AMPERE+GA102+GPU+ARCHITECTURE&amp;rft.date=2024-04-27&amp;rft_id=https%3A%2F%2Fwww.nvidia.com%2Fcontent%2FPDF%2Fnvidia-ampere-ga-102-gpu-architecture-whitepaper-v2.1.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-68"><span class="mw-cite-backlink"><b><a href="#cite_ref-68">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://www.nvidia.com/content/dam/en-zz/Solutions/design-visualization/support-guide/NVIDIA-L40-Datasheet-January-2023.pdf">"Datasheet NVIDIA L40"</a> <span class="cs1-format">(PDF)</span>. 27 April 2024.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=Datasheet+NVIDIA+L40&amp;rft.date=2024-04-27&amp;rft_id=https%3A%2F%2Fwww.nvidia.com%2Fcontent%2Fdam%2Fen-zz%2FSolutions%2Fdesign-visualization%2Fsupport-guide%2FNVIDIA-L40-Datasheet-January-2023.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-69"><span class="mw-cite-backlink"><b><a href="#cite_ref-69">^</a></b></span> <span class="reference-text">In the Whitepapers the Tensor Core cube diagrams represent the Dot Product Unit Width into the height (4 FP16 for Volta and Turing, 8 FP16 for A100, 4 FP16 for GA102, 16 FP16 for GH100). The other two dimensions represent the number of Dot Product Units (4x4 = 16 for Volta and Turing, 8x4 = 32 for Ampere and Hopper). The resulting gray blocks are the FP16 FMA operations per cycle. Pascal without Tensor core is only shown for speed comparison as is Volta V100 with non-FP16 datatypes.</span> </li> <li id="cite_note-70"><span class="mw-cite-backlink"><b><a href="#cite_ref-70">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://images.nvidia.com/aem-dam/en-zz/Solutions/design-visualization/technologies/turing-architecture/NVIDIA-Turing-Architecture-Whitepaper.pdf">"NVIDIA Turing Architecture Whitepaper"</a> <span class="cs1-format">(PDF)</span>. <i>nvidia.com</i><span class="reference-accessdate">. Retrieved <span class="nowrap">5 September</span> 2023</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=nvidia.com&amp;rft.atitle=NVIDIA+Turing+Architecture+Whitepaper&amp;rft_id=https%3A%2F%2Fimages.nvidia.com%2Faem-dam%2Fen-zz%2FSolutions%2Fdesign-visualization%2Ftechnologies%2Fturing-architecture%2FNVIDIA-Turing-Architecture-Whitepaper.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-71"><span class="mw-cite-backlink"><b><a href="#cite_ref-71">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf">"NVIDIA Tensor Core GPU"</a> <span class="cs1-format">(PDF)</span>. <i>nvidia.com</i><span class="reference-accessdate">. Retrieved <span class="nowrap">5 September</span> 2023</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=nvidia.com&amp;rft.atitle=NVIDIA+Tensor+Core+GPU&amp;rft_id=https%3A%2F%2Fwww.nvidia.com%2Fcontent%2Fdam%2Fen-zz%2FSolutions%2FData-Center%2Fa100%2Fpdf%2Fnvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-72"><span class="mw-cite-backlink"><b><a href="#cite_ref-72">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://developer.nvidia.com/blog/nvidia-hopper-architecture-in-depth/">"NVIDIA Hopper Architecture In-Depth"</a>. 22 March 2022.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=NVIDIA+Hopper+Architecture+In-Depth&amp;rft.date=2022-03-22&amp;rft_id=https%3A%2F%2Fdeveloper.nvidia.com%2Fblog%2Fnvidia-hopper-architecture-in-depth%2F&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-ReferenceC-73"><span class="mw-cite-backlink">^ <a href="#cite_ref-ReferenceC_73-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-ReferenceC_73-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text">shape x converted operand size, e.g. 2 tensor cores x 4x4x4xFP16/cycle = 256 Bytes/cycle</span> </li> <li id="cite_note-product_first_3_table_rows-74"><span class="mw-cite-backlink">^ <a href="#cite_ref-product_first_3_table_rows_74-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-product_first_3_table_rows_74-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text">= product first 3 table rows</span> </li> <li id="cite_note-ReferenceD-75"><span class="mw-cite-backlink">^ <a href="#cite_ref-ReferenceD_75-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-ReferenceD_75-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text">= product of previous 2 table rows; shape: e.g. 8x8x4xFP16 = 512 Bytes</span> </li> <li id="cite_note-76"><span class="mw-cite-backlink"><b><a href="#cite_ref-76">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFSunLiGengStuijk2023" class="citation journal cs1">Sun, Wei; Li, Ang; Geng, Tong; Stuijk, Sander; Corporaal, Henk (2023). "Dissecting Tensor Cores via Microbenchmarks: Latency, Throughput and Numeric Behaviors". <i>IEEE Transactions on Parallel and Distributed Systems</i>. <b>34</b> (1): <span class="nowrap">246–</span>261. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2206.02874">2206.02874</a></span>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1109%2Ftpds.2022.3217824">10.1109/tpds.2022.3217824</a>. <a href="/wiki/S2CID_(identifier)" class="mw-redirect" title="S2CID (identifier)">S2CID</a>&#160;<a rel="nofollow" class="external text" href="https://api.semanticscholar.org/CorpusID:249431357">249431357</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=IEEE+Transactions+on+Parallel+and+Distributed+Systems&amp;rft.atitle=Dissecting+Tensor+Cores+via+Microbenchmarks%3A+Latency%2C+Throughput+and+Numeric+Behaviors&amp;rft.volume=34&amp;rft.issue=1&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E246-%3C%2Fspan%3E261&amp;rft.date=2023&amp;rft_id=info%3Aarxiv%2F2206.02874&amp;rft_id=https%3A%2F%2Fapi.semanticscholar.org%2FCorpusID%3A249431357%23id-name%3DS2CID&amp;rft_id=info%3Adoi%2F10.1109%2Ftpds.2022.3217824&amp;rft.aulast=Sun&amp;rft.aufirst=Wei&amp;rft.au=Li%2C+Ang&amp;rft.au=Geng%2C+Tong&amp;rft.au=Stuijk%2C+Sander&amp;rft.au=Corporaal%2C+Henk&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-77"><span class="mw-cite-backlink"><b><a href="#cite_ref-77">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-instructions-mma">"Parallel Thread Execution ISA Version 7.7"</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=Parallel+Thread+Execution+ISA+Version+7.7&amp;rft_id=https%3A%2F%2Fdocs.nvidia.com%2Fcuda%2Fparallel-thread-execution%2Findex.html%23warp-level-matrix-instructions-mma&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-78"><span class="mw-cite-backlink"><b><a href="#cite_ref-78">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFRaihanGoliAamodt2018" class="citation arxiv cs1">Raihan, Md Aamir; Goli, Negar; Aamodt, Tor (2018). "Modeling Deep Learning Accelerator Enabled GPUs". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1811.08309">1811.08309</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.MS">cs.MS</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=Modeling+Deep+Learning+Accelerator+Enabled+GPUs&amp;rft.date=2018&amp;rft_id=info%3Aarxiv%2F1811.08309&amp;rft.aulast=Raihan&amp;rft.aufirst=Md+Aamir&amp;rft.au=Goli%2C+Negar&amp;rft.au=Aamodt%2C+Tor&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-79"><span class="mw-cite-backlink"><b><a href="#cite_ref-79">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://www.nvidia.com/en-gb/geforce/ada-lovelace-architecture">"NVIDIA Ada Lovelace Architecture"</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=NVIDIA+Ada+Lovelace+Architecture&amp;rft_id=https%3A%2F%2Fwww.nvidia.com%2Fen-gb%2Fgeforce%2Fada-lovelace-architecture&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-ReferenceE-80"><span class="mw-cite-backlink">^ <a href="#cite_ref-ReferenceE_80-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-ReferenceE_80-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFJiaMaggioniSmithDaniele_Paolo_Scarpazza2019" class="citation arxiv cs1">Jia, Zhe; Maggioni, Marco; Smith, Jeffrey; Daniele Paolo Scarpazza (2019). "Dissecting the NVidia Turing T4 GPU via Microbenchmarking". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1903.07486">1903.07486</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.DC">cs.DC</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=Dissecting+the+NVidia+Turing+T4+GPU+via+Microbenchmarking&amp;rft.date=2019&amp;rft_id=info%3Aarxiv%2F1903.07486&amp;rft.aulast=Jia&amp;rft.aufirst=Zhe&amp;rft.au=Maggioni%2C+Marco&amp;rft.au=Smith%2C+Jeffrey&amp;rft.au=Daniele+Paolo+Scarpazza&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-81"><span class="mw-cite-backlink"><b><a href="#cite_ref-81">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFBurgess2019" class="citation book cs1">Burgess, John (2019). <a rel="nofollow" class="external text" href="https://ieeexplore.ieee.org/document/8875651">"RTX ON – The NVIDIA TURING GPU"</a>. <i>2019 IEEE Hot Chips 31 Symposium (HCS)</i>. pp.&#160;<span class="nowrap">1–</span>27. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1109%2FHOTCHIPS.2019.8875651">10.1109/HOTCHIPS.2019.8875651</a>. <a href="/wiki/ISBN_(identifier)" class="mw-redirect" title="ISBN (identifier)">ISBN</a>&#160;<a href="/wiki/Special:BookSources/978-1-7281-2089-8" title="Special:BookSources/978-1-7281-2089-8"><bdi>978-1-7281-2089-8</bdi></a>. <a href="/wiki/S2CID_(identifier)" class="mw-redirect" title="S2CID (identifier)">S2CID</a>&#160;<a rel="nofollow" class="external text" href="https://api.semanticscholar.org/CorpusID:204822166">204822166</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.atitle=RTX+ON+%E2%80%93+The+NVIDIA+TURING+GPU&amp;rft.btitle=2019+IEEE+Hot+Chips+31+Symposium+%28HCS%29&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E1-%3C%2Fspan%3E27&amp;rft.date=2019&amp;rft_id=https%3A%2F%2Fapi.semanticscholar.org%2FCorpusID%3A204822166%23id-name%3DS2CID&amp;rft_id=info%3Adoi%2F10.1109%2FHOTCHIPS.2019.8875651&amp;rft.isbn=978-1-7281-2089-8&amp;rft.aulast=Burgess&amp;rft.aufirst=John&amp;rft_id=https%3A%2F%2Fieeexplore.ieee.org%2Fdocument%2F8875651&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-82"><span class="mw-cite-backlink"><b><a href="#cite_ref-82">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFBurgess2019" class="citation book cs1">Burgess, John (2019). <a rel="nofollow" class="external text" href="https://ieeexplore.ieee.org/document/8875651">"RTX ON – The NVIDIA TURING GPU"</a>. <i>2019 IEEE Hot Chips 31 Symposium (HCS)</i>. pp.&#160;<span class="nowrap">1–</span>27. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1109%2FHOTCHIPS.2019.8875651">10.1109/HOTCHIPS.2019.8875651</a>. <a href="/wiki/ISBN_(identifier)" class="mw-redirect" title="ISBN (identifier)">ISBN</a>&#160;<a href="/wiki/Special:BookSources/978-1-7281-2089-8" title="Special:BookSources/978-1-7281-2089-8"><bdi>978-1-7281-2089-8</bdi></a>. <a href="/wiki/S2CID_(identifier)" class="mw-redirect" title="S2CID (identifier)">S2CID</a>&#160;<a rel="nofollow" class="external text" href="https://api.semanticscholar.org/CorpusID:204822166">204822166</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.atitle=RTX+ON+%E2%80%93+The+NVIDIA+TURING+GPU&amp;rft.btitle=2019+IEEE+Hot+Chips+31+Symposium+%28HCS%29&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E1-%3C%2Fspan%3E27&amp;rft.date=2019&amp;rft_id=https%3A%2F%2Fapi.semanticscholar.org%2FCorpusID%3A204822166%23id-name%3DS2CID&amp;rft_id=info%3Adoi%2F10.1109%2FHOTCHIPS.2019.8875651&amp;rft.isbn=978-1-7281-2089-8&amp;rft.aulast=Burgess&amp;rft.aufirst=John&amp;rft_id=https%3A%2F%2Fieeexplore.ieee.org%2Fdocument%2F8875651&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-83"><span class="mw-cite-backlink"><b><a href="#cite_ref-83">^</a></b></span> <span class="reference-text">dependent on device</span> </li> <li id="cite_note-Tegra_X1-84"><span class="mw-cite-backlink">^ <a href="#cite_ref-Tegra_X1_84-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-Tegra_X1_84-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://developer.nvidia.com/content/tegra-x1">"Tegra X1"</a>. 9 January 2015.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=Tegra+X1&amp;rft.date=2015-01-09&amp;rft_id=https%3A%2F%2Fdeveloper.nvidia.com%2Fcontent%2Ftegra-x1&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-85"><span class="mw-cite-backlink"><b><a href="#cite_ref-85">^</a></b></span> <span class="reference-text"><a rel="nofollow" class="external text" href="https://nvdam.widen.net/s/5bx55xfnxf/gtc22-whitepaper-hopper">NVIDIA H100 Tensor Core GPU Architecture</a></span> </li> <li id="cite_note-86"><span class="mw-cite-backlink"><b><a href="#cite_ref-86">^</a></b></span> <span class="reference-text"><a rel="nofollow" class="external text" href="https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#features-and-technical-specifications">H.1. Features and Technical Specifications – Table 14. Technical Specifications per Compute Capability</a></span> </li> <li id="cite_note-87"><span class="mw-cite-backlink"><b><a href="#cite_ref-87">^</a></b></span> <span class="reference-text"><a rel="nofollow" class="external text" href="https://developer.nvidia.com/blog/nvidia-hopper-architecture-in-depth">NVIDIA Hopper Architecture In-Depth</a></span> </li> <li id="cite_note-88"><span class="mw-cite-backlink"><b><a href="#cite_ref-88">^</a></b></span> <span class="reference-text">can only execute 160 integer instructions according to programming guide</span> </li> <li id="cite_note-89"><span class="mw-cite-backlink"><b><a href="#cite_ref-89">^</a></b></span> <span class="reference-text">128 according to <a rel="nofollow" class="external autonumber" href="https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#arithmetic-instructions">[1]</a>. 64 from FP32 + 64 separate units?</span> </li> <li id="cite_note-90"><span class="mw-cite-backlink"><b><a href="#cite_ref-90">^</a></b></span> <span class="reference-text">64 by FP32 cores and 64 by flexible FP32/INT cores.</span> </li> <li id="cite_note-91"><span class="mw-cite-backlink"><b><a href="#cite_ref-91">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#arithmetic-instructions">"CUDA C++ Programming Guide"</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=CUDA+C%2B%2B+Programming+Guide&amp;rft_id=https%3A%2F%2Fdocs.nvidia.com%2Fcuda%2Fcuda-c-programming-guide%2Findex.html%23arithmetic-instructions&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-92"><span class="mw-cite-backlink"><b><a href="#cite_ref-92">^</a></b></span> <span class="reference-text">32 FP32 lanes combine to 16 FP64 lanes. Maybe lower depending on model.</span> </li> <li id="cite_note-93"><span class="mw-cite-backlink"><b><a href="#cite_ref-93">^</a></b></span> <span class="reference-text">only supported by 16 FP32 lanes, they combine to 4 FP64 lanes</span> </li> <li id="cite_note-depending_on_model-94"><span class="mw-cite-backlink">^ <a href="#cite_ref-depending_on_model_94-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-depending_on_model_94-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-depending_on_model_94-2"><sup><i><b>c</b></i></sup></a> <a href="#cite_ref-depending_on_model_94-3"><sup><i><b>d</b></i></sup></a> <a href="#cite_ref-depending_on_model_94-4"><sup><i><b>e</b></i></sup></a> <a href="#cite_ref-depending_on_model_94-5"><sup><i><b>f</b></i></sup></a></span> <span class="reference-text">depending on model</span> </li> <li id="cite_note-95"><span class="mw-cite-backlink"><b><a href="#cite_ref-95">^</a></b></span> <span class="reference-text">Effective speed, probably over FP32 ports. No description of actual FP64 cores.</span> </li> <li id="cite_note-96"><span class="mw-cite-backlink"><b><a href="#cite_ref-96">^</a></b></span> <span class="reference-text">Can also be used for integer additions and comparisons</span> </li> <li id="cite_note-97"><span class="mw-cite-backlink"><b><a href="#cite_ref-97">^</a></b></span> <span class="reference-text">2 clock cycles/instruction for each SM partition <link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFBurgess2019" class="citation book cs1">Burgess, John (2019). <a rel="nofollow" class="external text" href="https://ieeexplore.ieee.org/document/8875651">"RTX ON – The NVIDIA TURING GPU"</a>. <i>2019 IEEE Hot Chips 31 Symposium (HCS)</i>. pp.&#160;<span class="nowrap">1–</span>27. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1109%2FHOTCHIPS.2019.8875651">10.1109/HOTCHIPS.2019.8875651</a>. <a href="/wiki/ISBN_(identifier)" class="mw-redirect" title="ISBN (identifier)">ISBN</a>&#160;<a href="/wiki/Special:BookSources/978-1-7281-2089-8" title="Special:BookSources/978-1-7281-2089-8"><bdi>978-1-7281-2089-8</bdi></a>. <a href="/wiki/S2CID_(identifier)" class="mw-redirect" title="S2CID (identifier)">S2CID</a>&#160;<a rel="nofollow" class="external text" href="https://api.semanticscholar.org/CorpusID:204822166">204822166</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.atitle=RTX+ON+%E2%80%93+The+NVIDIA+TURING+GPU&amp;rft.btitle=2019+IEEE+Hot+Chips+31+Symposium+%28HCS%29&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E1-%3C%2Fspan%3E27&amp;rft.date=2019&amp;rft_id=https%3A%2F%2Fapi.semanticscholar.org%2FCorpusID%3A204822166%23id-name%3DS2CID&amp;rft_id=info%3Adoi%2F10.1109%2FHOTCHIPS.2019.8875651&amp;rft.isbn=978-1-7281-2089-8&amp;rft.aulast=Burgess&amp;rft.aufirst=John&amp;rft_id=https%3A%2F%2Fieeexplore.ieee.org%2Fdocument%2F8875651&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-inside-volta-98"><span class="mw-cite-backlink"><b><a href="#cite_ref-inside-volta_98-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFDurantGirouxHarrisStam2017" class="citation web cs1">Durant, Luke; Giroux, Olivier; Harris, Mark; Stam, Nick (May 10, 2017). <a rel="nofollow" class="external text" href="https://devblogs.nvidia.com/inside-volta/">"Inside Volta: The World's Most Advanced Data Center GPU"</a>. <i>Nvidia developer blog</i>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=Nvidia+developer+blog&amp;rft.atitle=Inside+Volta%3A+The+World%27s+Most+Advanced+Data+Center+GPU&amp;rft.date=2017-05-10&amp;rft.aulast=Durant&amp;rft.aufirst=Luke&amp;rft.au=Giroux%2C+Olivier&amp;rft.au=Harris%2C+Mark&amp;rft.au=Stam%2C+Nick&amp;rft_id=https%3A%2F%2Fdevblogs.nvidia.com%2Finside-volta%2F&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-99"><span class="mw-cite-backlink"><b><a href="#cite_ref-99">^</a></b></span> <span class="reference-text">The schedulers and dispatchers have dedicated execution units unlike with Fermi and Kepler.</span> </li> <li id="cite_note-100"><span class="mw-cite-backlink"><b><a href="#cite_ref-100">^</a></b></span> <span class="reference-text">Dispatching can overlap concurrently, if it takes more than one cycle (when there are less execution units than 32/SM Partition)</span> </li> <li id="cite_note-101"><span class="mw-cite-backlink"><b><a href="#cite_ref-101">^</a></b></span> <span class="reference-text">Can dual issue MAD pipe and SFU pipe</span> </li> <li id="cite_note-102"><span class="mw-cite-backlink"><b><a href="#cite_ref-102">^</a></b></span> <span class="reference-text">No more than one scheduler can issue 2 instructions at once. The first scheduler is in charge of warps with odd IDs. The second scheduler is in charge of warps with even IDs.</span> </li> <li id="cite_note-shared_memory_only,_no_data_cache-103"><span class="mw-cite-backlink">^ <a href="#cite_ref-shared_memory_only,_no_data_cache_103-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-shared_memory_only,_no_data_cache_103-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text">shared memory only, no data cache</span> </li> <li id="cite_note-ReferenceA-104"><span class="mw-cite-backlink">^ <a href="#cite_ref-ReferenceA_104-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-ReferenceA_104-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-ReferenceA_104-2"><sup><i><b>c</b></i></sup></a> <a href="#cite_ref-ReferenceA_104-3"><sup><i><b>d</b></i></sup></a> <a href="#cite_ref-ReferenceA_104-4"><sup><i><b>e</b></i></sup></a> <a href="#cite_ref-ReferenceA_104-5"><sup><i><b>f</b></i></sup></a></span> <span class="reference-text">shared memory separate, but L1 includes texture cache</span> </li> <li id="cite_note-105"><span class="mw-cite-backlink"><b><a href="#cite_ref-105">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#architecture-7-x">"H.6.1. Architecture"</a>. <i>docs.nvidia.com</i><span class="reference-accessdate">. Retrieved <span class="nowrap">2019-05-13</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=docs.nvidia.com&amp;rft.atitle=H.6.1.+Architecture&amp;rft_id=https%3A%2F%2Fdocs.nvidia.com%2Fcuda%2Fcuda-c-programming-guide%2Findex.html%23architecture-7-x&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-106"><span class="mw-cite-backlink"><b><a href="#cite_ref-106">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://www.stuffedcow.net/files/gpuarch-ispass2010.pdf">"Demystifying GPU Microarchitecture through Microbenchmarking"</a> <span class="cs1-format">(PDF)</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=Demystifying+GPU+Microarchitecture+through+Microbenchmarking&amp;rft_id=https%3A%2F%2Fwww.stuffedcow.net%2Ffiles%2Fgpuarch-ispass2010.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-ReferenceF-107"><span class="mw-cite-backlink">^ <a href="#cite_ref-ReferenceF_107-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-ReferenceF_107-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFJiaMaggioniStaigerScarpazza2018" class="citation arxiv cs1">Jia, Zhe; Maggioni, Marco; Staiger, Benjamin; Scarpazza, Daniele P. (2018). "Dissecting the NVIDIA Volta GPU Architecture via Microbenchmarking". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1804.06826">1804.06826</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.DC">cs.DC</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=Dissecting+the+NVIDIA+Volta+GPU+Architecture+via+Microbenchmarking&amp;rft.date=2018&amp;rft_id=info%3Aarxiv%2F1804.06826&amp;rft.aulast=Jia&amp;rft.aufirst=Zhe&amp;rft.au=Maggioni%2C+Marco&amp;rft.au=Staiger%2C+Benjamin&amp;rft.au=Scarpazza%2C+Daniele+P.&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-108"><span class="mw-cite-backlink"><b><a href="#cite_ref-108">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFJiaMaggioniSmithDaniele_Paolo_Scarpazza2019" class="citation arxiv cs1">Jia, Zhe; Maggioni, Marco; Smith, Jeffrey; Daniele Paolo Scarpazza (2019). "Dissecting the NVidia Turing T4 GPU via Microbenchmarking". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1903.07486">1903.07486</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.DC">cs.DC</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=Dissecting+the+NVidia+Turing+T4+GPU+via+Microbenchmarking&amp;rft.date=2019&amp;rft_id=info%3Aarxiv%2F1903.07486&amp;rft.aulast=Jia&amp;rft.aufirst=Zhe&amp;rft.au=Maggioni%2C+Marco&amp;rft.au=Smith%2C+Jeffrey&amp;rft.au=Daniele+Paolo+Scarpazza&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-109"><span class="mw-cite-backlink"><b><a href="#cite_ref-109">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://www.nvidia.com/en-us/on-demand/session/gtcspring21-s33322/">"Dissecting the Ampere GPU Architecture through Microbenchmarking"</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=Dissecting+the+Ampere+GPU+Architecture+through+Microbenchmarking&amp;rft_id=https%3A%2F%2Fwww.nvidia.com%2Fen-us%2Fon-demand%2Fsession%2Fgtcspring21-s33322%2F&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-110"><span class="mw-cite-backlink"><b><a href="#cite_ref-110">^</a></b></span> <span class="reference-text">Note that <link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFJiaMaggioniSmithDaniele_Paolo_Scarpazza2019" class="citation arxiv cs1">Jia, Zhe; Maggioni, Marco; Smith, Jeffrey; Daniele Paolo Scarpazza (2019). "Dissecting the NVidia Turing T4 GPU via Microbenchmarking". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="id-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1903.07486">1903.07486</a></span> [<a rel="nofollow" class="external text" href="https://arxiv.org/archive/cs.DC">cs.DC</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=Dissecting+the+NVidia+Turing+T4+GPU+via+Microbenchmarking&amp;rft.date=2019&amp;rft_id=info%3Aarxiv%2F1903.07486&amp;rft.aulast=Jia&amp;rft.aufirst=Zhe&amp;rft.au=Maggioni%2C+Marco&amp;rft.au=Smith%2C+Jeffrey&amp;rft.au=Daniele+Paolo+Scarpazza&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span> disagrees and states 2 KiB L0 instruction cache per SM partition and 16 KiB L1 instruction cache per SM</span> </li> <li id="cite_note-111"><span class="mw-cite-backlink"><b><a href="#cite_ref-111">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://github.com/hyqneuron/asfermi/wiki/Opcode">"asfermi Opcode"</a>. <i><a href="/wiki/GitHub" title="GitHub">GitHub</a></i>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=GitHub&amp;rft.atitle=asfermi+Opcode&amp;rft_id=https%3A%2F%2Fgithub.com%2Fhyqneuron%2Fasfermi%2Fwiki%2FOpcode&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-ReferenceB-112"><span class="mw-cite-backlink">^ <a href="#cite_ref-ReferenceB_112-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-ReferenceB_112-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text">for access with texture engine only</span> </li> <li id="cite_note-113"><span class="mw-cite-backlink"><b><a href="#cite_ref-113">^</a></b></span> <span class="reference-text">25% disabled on RTX 4060, RTX 4070, RTX 4070 Ti and RTX 4090</span> </li> <li id="cite_note-114"><span class="mw-cite-backlink"><b><a href="#cite_ref-114">^</a></b></span> <span class="reference-text">25% disabled on RTX 5070 Ti and RTX 5090</span> </li> <li id="cite_note-115"><span class="mw-cite-backlink"><b><a href="#cite_ref-115">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#compute-capabilities">"CUDA C++ Programming Guide, Compute Capabilities"</a>. <i>docs.nvidia.com</i><span class="reference-accessdate">. Retrieved <span class="nowrap">2025-02-06</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=docs.nvidia.com&amp;rft.atitle=CUDA+C%2B%2B+Programming+Guide%2C+Compute+Capabilities&amp;rft_id=https%3A%2F%2Fdocs.nvidia.com%2Fcuda%2Fcuda-c-programming-guide%2Findex.html%23compute-capabilities&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-116"><span class="mw-cite-backlink"><b><a href="#cite_ref-116">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://www.biocentric.nl/biocentric/nvidia-cuda-bioinformatics-barracuda/">"nVidia CUDA Bioinformatics: BarraCUDA"</a>. <i>BioCentric</i>. 2019-07-19<span class="reference-accessdate">. Retrieved <span class="nowrap">2019-10-15</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=BioCentric&amp;rft.atitle=nVidia+CUDA+Bioinformatics%3A+BarraCUDA&amp;rft.date=2019-07-19&amp;rft_id=https%3A%2F%2Fwww.biocentric.nl%2Fbiocentric%2Fnvidia-cuda-bioinformatics-barracuda%2F&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-117"><span class="mw-cite-backlink"><b><a href="#cite_ref-117">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://developer.nvidia.com/gpugems/gpugems3/part-v-physics-simulation">"Part V: Physics Simulation"</a>. <i>NVIDIA Developer</i><span class="reference-accessdate">. Retrieved <span class="nowrap">2020-09-11</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=NVIDIA+Developer&amp;rft.atitle=Part+V%3A+Physics+Simulation&amp;rft_id=https%3A%2F%2Fdeveloper.nvidia.com%2Fgpugems%2Fgpugems3%2Fpart-v-physics-simulation&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-118"><span class="mw-cite-backlink"><b><a href="#cite_ref-118">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://www.oneapi.io/">"oneAPI Programming Model"</a>. <i>oneAPI.io</i><span class="reference-accessdate">. Retrieved <span class="nowrap">2024-07-27</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=oneAPI.io&amp;rft.atitle=oneAPI+Programming+Model&amp;rft_id=https%3A%2F%2Fwww.oneapi.io%2F&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-119"><span class="mw-cite-backlink"><b><a href="#cite_ref-119">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://www.oneapi.io/spec/">"Specifications | oneAPI"</a>. <i>oneAPI.io</i><span class="reference-accessdate">. Retrieved <span class="nowrap">2024-07-27</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=oneAPI.io&amp;rft.atitle=Specifications+%7C+oneAPI&amp;rft_id=https%3A%2F%2Fwww.oneapi.io%2Fspec%2F&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-120"><span class="mw-cite-backlink"><b><a href="#cite_ref-120">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://oneapi-spec.uxlfoundation.org/specifications/oneapi/v1.3-rev-1/">"oneAPI Specification — oneAPI Specification 1.3-rev-1 documentation"</a>. <i>oneapi-spec.uxlfoundation.org</i><span class="reference-accessdate">. Retrieved <span class="nowrap">2024-07-27</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=oneapi-spec.uxlfoundation.org&amp;rft.atitle=oneAPI+Specification+%E2%80%94+oneAPI+Specification+1.3-rev-1+documentation&amp;rft_id=https%3A%2F%2Foneapi-spec.uxlfoundation.org%2Fspecifications%2Foneapi%2Fv1.3-rev-1%2F&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-121"><span class="mw-cite-backlink"><b><a href="#cite_ref-121">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://www.reuters.com/technology/behind-plot-break-nvidias-grip-ai-by-targeting-software-2024-03-25/">"Exclusive: Behind the plot to break Nvidia's grip on AI by targeting software"</a>. <i><a href="/wiki/Reuters" title="Reuters">Reuters</a></i><span class="reference-accessdate">. Retrieved <span class="nowrap">2024-04-05</span></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=Reuters&amp;rft.atitle=Exclusive%3A+Behind+the+plot+to+break+Nvidia%27s+grip+on+AI+by+targeting+software&amp;rft_id=https%3A%2F%2Fwww.reuters.com%2Ftechnology%2Fbehind-plot-break-nvidias-grip-ai-by-targeting-software-2024-03-25%2F&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> <li id="cite_note-122"><span class="mw-cite-backlink"><b><a href="#cite_ref-122">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://github.com/RadeonOpenCompute/ROCm/issues/1628">"Question: What does ROCm stand for? · Issue #1628 · RadeonOpenCompute/ROCm"</a>. <i>Github.com</i><span class="reference-accessdate">. Retrieved <span class="nowrap">January 18,</span> 2022</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=Github.com&amp;rft.atitle=Question%3A+What+does+ROCm+stand+for%3F+%C2%B7+Issue+%231628+%C2%B7+RadeonOpenCompute%2FROCm&amp;rft_id=https%3A%2F%2Fgithub.com%2FRadeonOpenCompute%2FROCm%2Fissues%2F1628&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></span> </li> </ol></div> <div class="mw-heading mw-heading2"><h2 id="Further_reading">Further reading</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=CUDA&amp;action=edit&amp;section=22" title="Edit section: Further reading"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <ul><li><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFBuckFoleyHornSugerman2004" class="citation journal cs1">Buck, Ian; Foley, Tim; Horn, Daniel; Sugerman, Jeremy; Fatahalian, Kayvon; Houston, Mike; Hanrahan, Pat (2004-08-01). <a rel="nofollow" class="external text" href="https://dl.acm.org/doi/10.1145/1015706.1015800">"Brook for GPUs: stream computing on graphics hardware"</a>. <i>ACM Transactions on Graphics</i>. <b>23</b> (3): <span class="nowrap">777–</span>786. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1145%2F1015706.1015800">10.1145/1015706.1015800</a>. <a href="/wiki/ISSN_(identifier)" class="mw-redirect" title="ISSN (identifier)">ISSN</a>&#160;<a rel="nofollow" class="external text" href="https://search.worldcat.org/issn/0730-0301">0730-0301</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=ACM+Transactions+on+Graphics&amp;rft.atitle=Brook+for+GPUs%3A+stream+computing+on+graphics+hardware&amp;rft.volume=23&amp;rft.issue=3&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E777-%3C%2Fspan%3E786&amp;rft.date=2004-08-01&amp;rft_id=info%3Adoi%2F10.1145%2F1015706.1015800&amp;rft.issn=0730-0301&amp;rft.aulast=Buck&amp;rft.aufirst=Ian&amp;rft.au=Foley%2C+Tim&amp;rft.au=Horn%2C+Daniel&amp;rft.au=Sugerman%2C+Jeremy&amp;rft.au=Fatahalian%2C+Kayvon&amp;rft.au=Houston%2C+Mike&amp;rft.au=Hanrahan%2C+Pat&amp;rft_id=https%3A%2F%2Fdl.acm.org%2Fdoi%2F10.1145%2F1015706.1015800&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></li> <li><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1238218222" /><cite id="CITEREFNickollsBuckGarlandSkadron2008" class="citation journal cs1">Nickolls, John; Buck, Ian; Garland, Michael; Skadron, Kevin (2008-03-01). <a rel="nofollow" class="external text" href="https://dl.acm.org/doi/10.1145/1365490.1365500">"Scalable Parallel Programming with CUDA: Is CUDA the parallel programming model that application developers have been waiting for?"</a>. <i>Queue</i>. <b>6</b> (2): <span class="nowrap">40–</span>53. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1145%2F1365490.1365500">10.1145/1365490.1365500</a>. <a href="/wiki/ISSN_(identifier)" class="mw-redirect" title="ISSN (identifier)">ISSN</a>&#160;<a rel="nofollow" class="external text" href="https://search.worldcat.org/issn/1542-7730">1542-7730</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Queue&amp;rft.atitle=Scalable+Parallel+Programming+with+CUDA%3A+Is+CUDA+the+parallel+programming+model+that+application+developers+have+been+waiting+for%3F&amp;rft.volume=6&amp;rft.issue=2&amp;rft.pages=%3Cspan+class%3D%22nowrap%22%3E40-%3C%2Fspan%3E53&amp;rft.date=2008-03-01&amp;rft_id=info%3Adoi%2F10.1145%2F1365490.1365500&amp;rft.issn=1542-7730&amp;rft.aulast=Nickolls&amp;rft.aufirst=John&amp;rft.au=Buck%2C+Ian&amp;rft.au=Garland%2C+Michael&amp;rft.au=Skadron%2C+Kevin&amp;rft_id=https%3A%2F%2Fdl.acm.org%2Fdoi%2F10.1145%2F1365490.1365500&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ACUDA" class="Z3988"></span></li></ul> <div class="mw-heading mw-heading2"><h2 id="External_links">External links</h2><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=CUDA&amp;action=edit&amp;section=23" title="Edit section: External links"><span>edit</span></a><span class="mw-editsection-bracket">]</span></span></div> <ul><li><span class="official-website"><span class="url"><a rel="nofollow" class="external text" href="https://developer.nvidia.com/cuda-zone">Official website</a></span></span> <span class="mw-valign-text-top" typeof="mw:File/Frameless"><a href="https://www.wikidata.org/wiki/Q477690#P856" title="Edit this at Wikidata"><img alt="Edit this at Wikidata" src="//upload.wikimedia.org/wikipedia/en/thumb/8/8a/OOjs_UI_icon_edit-ltr-progressive.svg/10px-OOjs_UI_icon_edit-ltr-progressive.svg.png" decoding="async" width="10" height="10" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/en/thumb/8/8a/OOjs_UI_icon_edit-ltr-progressive.svg/15px-OOjs_UI_icon_edit-ltr-progressive.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/8/8a/OOjs_UI_icon_edit-ltr-progressive.svg/20px-OOjs_UI_icon_edit-ltr-progressive.svg.png 2x" data-file-width="20" data-file-height="20" /></a></span></li></ul> <div class="navbox-styles"><style data-mw-deduplicate="TemplateStyles:r1129693374">.mw-parser-output .hlist dl,.mw-parser-output .hlist ol,.mw-parser-output .hlist ul{margin:0;padding:0}.mw-parser-output .hlist dd,.mw-parser-output .hlist dt,.mw-parser-output .hlist li{margin:0;display:inline}.mw-parser-output .hlist.inline,.mw-parser-output .hlist.inline dl,.mw-parser-output .hlist.inline ol,.mw-parser-output .hlist.inline ul,.mw-parser-output .hlist dl dl,.mw-parser-output .hlist dl ol,.mw-parser-output .hlist dl ul,.mw-parser-output .hlist ol dl,.mw-parser-output .hlist ol ol,.mw-parser-output .hlist ol ul,.mw-parser-output .hlist ul dl,.mw-parser-output .hlist ul ol,.mw-parser-output .hlist ul ul{display:inline}.mw-parser-output .hlist .mw-empty-li{display:none}.mw-parser-output .hlist dt::after{content:": "}.mw-parser-output .hlist dd::after,.mw-parser-output .hlist li::after{content:" · ";font-weight:bold}.mw-parser-output .hlist dd:last-child::after,.mw-parser-output .hlist dt:last-child::after,.mw-parser-output .hlist li:last-child::after{content:none}.mw-parser-output .hlist dd dd:first-child::before,.mw-parser-output .hlist dd dt:first-child::before,.mw-parser-output .hlist dd li:first-child::before,.mw-parser-output .hlist dt dd:first-child::before,.mw-parser-output .hlist dt dt:first-child::before,.mw-parser-output .hlist dt li:first-child::before,.mw-parser-output .hlist li dd:first-child::before,.mw-parser-output .hlist li dt:first-child::before,.mw-parser-output .hlist li li:first-child::before{content:" (";font-weight:normal}.mw-parser-output .hlist dd dd:last-child::after,.mw-parser-output .hlist dd dt:last-child::after,.mw-parser-output .hlist dd li:last-child::after,.mw-parser-output .hlist dt dd:last-child::after,.mw-parser-output .hlist dt dt:last-child::after,.mw-parser-output .hlist dt li:last-child::after,.mw-parser-output .hlist li dd:last-child::after,.mw-parser-output .hlist li dt:last-child::after,.mw-parser-output .hlist li li:last-child::after{content:")";font-weight:normal}.mw-parser-output .hlist ol{counter-reset:listitem}.mw-parser-output .hlist ol>li{counter-increment:listitem}.mw-parser-output .hlist ol>li::before{content:" "counter(listitem)"\a0 "}.mw-parser-output .hlist dd ol>li:first-child::before,.mw-parser-output .hlist dt ol>li:first-child::before,.mw-parser-output .hlist li ol>li:first-child::before{content:" ("counter(listitem)"\a0 "}</style><style data-mw-deduplicate="TemplateStyles:r1236075235">.mw-parser-output .navbox{box-sizing:border-box;border:1px solid #a2a9b1;width:100%;clear:both;font-size:88%;text-align:center;padding:1px;margin:1em auto 0}.mw-parser-output .navbox .navbox{margin-top:0}.mw-parser-output .navbox+.navbox,.mw-parser-output .navbox+.navbox-styles+.navbox{margin-top:-1px}.mw-parser-output .navbox-inner,.mw-parser-output .navbox-subgroup{width:100%}.mw-parser-output .navbox-group,.mw-parser-output .navbox-title,.mw-parser-output .navbox-abovebelow{padding:0.25em 1em;line-height:1.5em;text-align:center}.mw-parser-output .navbox-group{white-space:nowrap;text-align:right}.mw-parser-output .navbox,.mw-parser-output .navbox-subgroup{background-color:#fdfdfd}.mw-parser-output .navbox-list{line-height:1.5em;border-color:#fdfdfd}.mw-parser-output .navbox-list-with-group{text-align:left;border-left-width:2px;border-left-style:solid}.mw-parser-output tr+tr>.navbox-abovebelow,.mw-parser-output tr+tr>.navbox-group,.mw-parser-output tr+tr>.navbox-image,.mw-parser-output tr+tr>.navbox-list{border-top:2px solid #fdfdfd}.mw-parser-output .navbox-title{background-color:#ccf}.mw-parser-output .navbox-abovebelow,.mw-parser-output .navbox-group,.mw-parser-output .navbox-subgroup .navbox-title{background-color:#ddf}.mw-parser-output .navbox-subgroup .navbox-group,.mw-parser-output .navbox-subgroup .navbox-abovebelow{background-color:#e6e6ff}.mw-parser-output .navbox-even{background-color:#f7f7f7}.mw-parser-output .navbox-odd{background-color:transparent}.mw-parser-output .navbox .hlist td dl,.mw-parser-output .navbox .hlist td ol,.mw-parser-output .navbox .hlist td ul,.mw-parser-output .navbox td.hlist dl,.mw-parser-output .navbox td.hlist ol,.mw-parser-output .navbox td.hlist ul{padding:0.125em 0}.mw-parser-output .navbox .navbar{display:block;font-size:100%}.mw-parser-output .navbox-title .navbar{float:left;text-align:left;margin-right:0.5em}body.skin--responsive .mw-parser-output .navbox-image img{max-width:none!important}@media print{body.ns-0 .mw-parser-output .navbox{display:none!important}}</style></div><div role="navigation" class="navbox" aria-labelledby="Nvidia10" style="padding:3px"><table class="nowraplinks mw-collapsible autocollapse navbox-inner" style="border-spacing:0;background:transparent;color:inherit"><tbody><tr><th scope="col" class="navbox-title" colspan="2"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1129693374" /><style data-mw-deduplicate="TemplateStyles:r1239400231">.mw-parser-output .navbar{display:inline;font-size:88%;font-weight:normal}.mw-parser-output .navbar-collapse{float:left;text-align:left}.mw-parser-output .navbar-boxtext{word-spacing:0}.mw-parser-output .navbar ul{display:inline-block;white-space:nowrap;line-height:inherit}.mw-parser-output .navbar-brackets::before{margin-right:-0.125em;content:"[ "}.mw-parser-output .navbar-brackets::after{margin-left:-0.125em;content:" ]"}.mw-parser-output .navbar li{word-spacing:-0.125em}.mw-parser-output .navbar a>span,.mw-parser-output .navbar a>abbr{text-decoration:inherit}.mw-parser-output .navbar-mini abbr{font-variant:small-caps;border-bottom:none;text-decoration:none;cursor:inherit}.mw-parser-output .navbar-ct-full{font-size:114%;margin:0 7em}.mw-parser-output .navbar-ct-mini{font-size:114%;margin:0 4em}html.skin-theme-clientpref-night .mw-parser-output .navbar li a abbr{color:var(--color-base)!important}@media(prefers-color-scheme:dark){html.skin-theme-clientpref-os .mw-parser-output .navbar li a abbr{color:var(--color-base)!important}}@media print{.mw-parser-output .navbar{display:none!important}}</style><div class="navbar plainlinks hlist navbar-mini"><ul><li class="nv-view"><a href="/wiki/Template:Nvidia" title="Template:Nvidia"><abbr title="View this template">v</abbr></a></li><li class="nv-talk"><a href="/wiki/Template_talk:Nvidia" title="Template talk:Nvidia"><abbr title="Discuss this template">t</abbr></a></li><li class="nv-edit"><a href="/wiki/Special:EditPage/Template:Nvidia" title="Special:EditPage/Template:Nvidia"><abbr title="Edit this template">e</abbr></a></li></ul></div><div id="Nvidia10" style="font-size:114%;margin:0 4em"><a href="/wiki/Nvidia" title="Nvidia">Nvidia</a></div></th></tr><tr><td colspan="2" class="navbox-list navbox-odd hlist" style="width:100%;padding:0"><div style="padding:0 0.25em"></div><table class="nowraplinks mw-collapsible mw-collapsed navbox-subgroup" style="border-spacing:0"><tbody><tr><th scope="col" class="navbox-title" colspan="2"><div id="GeForce_(List_of_GPUs)10" style="font-size:114%;margin:0 4em"><a href="/wiki/GeForce" title="GeForce">GeForce</a> <small>(<a href="/wiki/List_of_Nvidia_graphics_processing_units" title="List of Nvidia graphics processing units">List of GPUs</a>)</small></div></th></tr><tr><td colspan="2" class="navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"></div><table class="nowraplinks navbox-subgroup" style="border-spacing:0"><tbody><tr><th scope="row" class="navbox-group" style="width:1%">Fixed pixel pipeline</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"></div><table class="nowraplinks navbox-subgroup" style="border-spacing:0"><tbody><tr><th id="Pre-GeForce68" scope="row" class="navbox-group" style="width:1%"><a href="/wiki/List_of_Nvidia_graphics_processing_units#Pre-GeForce" title="List of Nvidia graphics processing units">Pre-GeForce</a></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"><div class="hlist"> <ul><li><a href="/wiki/NV1" title="NV1">NV1</a></li> <li><a href="/wiki/NV2" class="mw-redirect" title="NV2">NV2</a></li></ul> </div> <div class="hlist"> <ul><li><a href="/wiki/RIVA_128" title="RIVA 128">RIVA 128</a></li> <li><a href="/wiki/RIVA_TNT" title="RIVA TNT">RIVA TNT</a></li> <li><a href="/wiki/RIVA_TNT2" title="RIVA TNT2">TNT2</a></li></ul> </div></div></td></tr></tbody></table><div> <ul><li>&#160;<a href="/wiki/GeForce_256" title="GeForce 256">GeForce 256</a></li> <li><a href="/wiki/GeForce_2_series" title="GeForce 2 series">2</a></li> <li><a href="/wiki/GeForce_4_series" title="GeForce 4 series">4 MX</a></li></ul></div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Vertex_shader" class="mw-redirect" title="Vertex shader">Vertex</a> and <a href="/wiki/Pixel_shader" class="mw-redirect" title="Pixel shader">pixel</a> shaders</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <li><a href="/wiki/GeForce_3_series" title="GeForce 3 series">GeForce 3</a></li> <li><a href="/wiki/GeForce_4_series" title="GeForce 4 series">4 Ti</a></li> <li><a href="/wiki/GeForce_FX_series" title="GeForce FX series">FX</a></li> <li><a href="/wiki/GeForce_6_series" title="GeForce 6 series">6</a></li> <li><a href="/wiki/GeForce_7_series" title="GeForce 7 series">7</a></li> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Unified_Shader_Model" class="mw-redirect" title="Unified Shader Model">Unified shaders</a></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/GeForce_8_series" title="GeForce 8 series">GeForce 8</a></li> <li><a href="/wiki/GeForce_9_series" title="GeForce 9 series">9</a></li> <li><a href="/wiki/GeForce_100_series" title="GeForce 100 series">100</a></li> <li><a href="/wiki/GeForce_200_series" title="GeForce 200 series">200</a></li> <li><a href="/wiki/GeForce_300_series" title="GeForce 300 series">300</a></li> <li><a href="/wiki/GeForce_400_series" title="GeForce 400 series">400</a></li> <li><a href="/wiki/GeForce_500_series" title="GeForce 500 series">500</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Unified shaders &amp; <a href="/wiki/Non-uniform_memory_access" title="Non-uniform memory access">NUMA</a></th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/GeForce_600_series" title="GeForce 600 series">GeForce 600</a></li> <li><a href="/wiki/GeForce_700_series" title="GeForce 700 series">700</a></li> <li><a href="/wiki/GeForce_800M_series" title="GeForce 800M series">800M</a></li> <li><a href="/wiki/GeForce_900_series" title="GeForce 900 series">900</a></li> <li><a href="/wiki/GeForce_10_series" title="GeForce 10 series">10</a></li> <li><a href="/wiki/GeForce_16_series" title="GeForce 16 series">16</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Ray_tracing_(graphics)" title="Ray tracing (graphics)">Ray tracing</a> &amp; <a href="/wiki/Tensor_Cores" class="mw-redirect" title="Tensor Cores">Tensor Cores</a></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/GeForce_RTX_20_series" title="GeForce RTX 20 series">GeForce 20</a></li> <li><a href="/wiki/GeForce_RTX_30_series" title="GeForce RTX 30 series">30</a></li> <li><a href="/wiki/GeForce_RTX_40_series" title="GeForce RTX 40 series">40</a></li> <li><a href="/wiki/GeForce_RTX_50_series" title="GeForce RTX 50 series">50</a></li></ul> </div></td></tr></tbody></table><div></div></td></tr></tbody></table><div></div></td></tr><tr><td colspan="2" class="navbox-list navbox-odd hlist" style="width:100%;padding:0"><div style="padding:0 0.25em"></div><table class="nowraplinks mw-collapsible mw-collapsed navbox-subgroup" style="border-spacing:0"><tbody><tr><th scope="col" class="navbox-title" colspan="2"><div id="Software_and_technologies10" style="font-size:114%;margin:0 4em">Software and technologies</div></th></tr><tr><td colspan="2" class="navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"></div><table class="nowraplinks navbox-subgroup" style="border-spacing:0"><tbody><tr><th scope="row" class="navbox-group" style="width:1%">Multimedia acceleration</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Nvidia_NVENC" title="Nvidia NVENC">NVENC</a> (video encoding)</li> <li><a href="/wiki/Nvidia_NVDEC" title="Nvidia NVDEC">NVDEC</a> (video decoding)</li> <li><a href="/wiki/Nvidia_PureVideo" title="Nvidia PureVideo">PureVideo</a> (video decoding)</li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Software</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Cg_(programming_language)" title="Cg (programming language)">Cg</a> (shading language)</li> <li><a class="mw-selflink selflink">CUDA</a></li> <li><a href="/wiki/Nvidia_GameWorks" title="Nvidia GameWorks">Nvidia GameWorks</a> <ul><li><a href="/wiki/OptiX" title="OptiX">OptiX</a> (ray tracing API)</li> <li><a href="/wiki/PhysX" title="PhysX">PhysX</a> (physics SDK)</li></ul></li> <li><a href="/wiki/Nvidia_Omniverse" title="Nvidia Omniverse">Nvidia Omniverse</a> (3D graphics)</li> <li><a href="/wiki/Nvidia_RTX" title="Nvidia RTX">Nvidia RTX</a> (ray tracing platform)</li> <li><a href="/wiki/Nvidia_System_Tools" title="Nvidia System Tools">Nvidia System Tools</a></li> <li><a href="/wiki/VDPAU" title="VDPAU">VDPAU</a> (video decode API)</li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Technologies</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Nvidia_3D_Vision" title="Nvidia 3D Vision">Nvidia 3D Vision</a> (stereo 3D)</li> <li><a href="/wiki/Nvidia_G-Sync" title="Nvidia G-Sync">Nvidia G-Sync</a> (variable refresh rate)</li> <li><a href="/wiki/Nvidia_Optimus" title="Nvidia Optimus">Nvidia Optimus</a> (GPU switching)</li> <li><a href="/wiki/Nvidia_Surround" class="mw-redirect" title="Nvidia Surround">Nvidia Surround</a> (multi-monitor)</li> <li><a href="/wiki/Mobile_PCI_Express_Module" title="Mobile PCI Express Module">MXM</a> (module/socket)</li> <li><a href="/wiki/SXM_(socket)" title="SXM (socket)">SXM</a> (module/socket)</li> <li><a href="/wiki/NVLink" title="NVLink">NVLink</a> (protocol)</li> <li><a href="/wiki/Scalable_Link_Interface" title="Scalable Link Interface">Scalable Link Interface</a> (multi-GPU)</li> <li><a href="/wiki/TurboCache" title="TurboCache">TurboCache</a> (framebuffer in system memory)</li> <li><a href="/wiki/Video_Super_Resolution" title="Video Super Resolution">Video Super Resolution</a> (live video upscaling)</li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">GPU microarchitectures</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Celsius_(microarchitecture)" title="Celsius (microarchitecture)">Celsius</a></li> <li><a href="/wiki/Kelvin_(microarchitecture)" title="Kelvin (microarchitecture)">Kelvin</a></li> <li><a href="/wiki/Rankine_(microarchitecture)" title="Rankine (microarchitecture)">Rankine</a></li> <li><a href="/wiki/Curie_(microarchitecture)" title="Curie (microarchitecture)">Curie</a></li> <li><a href="/wiki/Tesla_(microarchitecture)" title="Tesla (microarchitecture)">Tesla</a></li> <li><a href="/wiki/Fermi_(microarchitecture)" title="Fermi (microarchitecture)">Fermi</a></li> <li><a href="/wiki/Kepler_(microarchitecture)" title="Kepler (microarchitecture)">Kepler</a></li> <li><a href="/wiki/Maxwell_(microarchitecture)" title="Maxwell (microarchitecture)">Maxwell</a></li> <li><a href="/wiki/Pascal_(microarchitecture)" title="Pascal (microarchitecture)">Pascal</a></li> <li><a href="/wiki/Volta_(microarchitecture)" title="Volta (microarchitecture)">Volta</a></li> <li><a href="/wiki/Turing_(microarchitecture)" title="Turing (microarchitecture)">Turing</a></li> <li><a href="/wiki/Ampere_(microarchitecture)" title="Ampere (microarchitecture)">Ampere</a></li> <li><a href="/wiki/Hopper_(microarchitecture)" title="Hopper (microarchitecture)">Hopper</a></li> <li><a href="/wiki/Ada_Lovelace_(microarchitecture)" title="Ada Lovelace (microarchitecture)">Ada Lovelace</a></li> <li><a href="/wiki/Blackwell_(microarchitecture)" title="Blackwell (microarchitecture)">Blackwell</a></li> <li><a href="/wiki/Rubin_(microarchitecture)" title="Rubin (microarchitecture)">Rubin</a></li> <li><a href="/wiki/Feynman_(microarchitecture)" title="Feynman (microarchitecture)">Feynman</a></li></ul> </div></td></tr></tbody></table><div></div></td></tr></tbody></table><div></div></td></tr><tr><td colspan="2" class="navbox-list navbox-odd hlist" style="width:100%;padding:0"><div style="padding:0 0.25em"></div><table class="nowraplinks mw-collapsible mw-collapsed navbox-subgroup" style="border-spacing:0"><tbody><tr><th scope="col" class="navbox-title" colspan="2"><div id="Other_products10" style="font-size:114%;margin:0 4em">Other products</div></th></tr><tr><td colspan="2" class="navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"></div><table class="nowraplinks navbox-subgroup" style="border-spacing:0"><tbody><tr><th scope="row" class="navbox-group" style="width:1%">Graphics <a href="/wiki/Workstation" title="Workstation">Workstation</a> cards</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Quadro" title="Quadro">Nvidia Quadro</a> <ul><li><a href="/wiki/Nvidia_Quadro_Plex" title="Nvidia Quadro Plex">Quadro Plex</a></li></ul></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/General-purpose_computing_on_graphics_processing_units" title="General-purpose computing on graphics processing units">GPGPU</a></th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Nvidia_Tesla" title="Nvidia Tesla">Nvidia Tesla</a></li> <li><a href="/wiki/Nvidia_DGX" title="Nvidia DGX">DGX</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Video_game_console" title="Video game console">Console</a> components</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Xbox_(console)#Technical_specifications" title="Xbox (console)">NV2A</a> <span style="font-size:85%;">(<a href="/wiki/Xbox_(console)" title="Xbox (console)">Xbox</a>)</span></li> <li><a href="/wiki/RSX_Reality_Synthesizer" title="RSX Reality Synthesizer">RSX 'Reality Synthesizer'</a> <span style="font-size:85%;">(<a href="/wiki/PlayStation_3" title="PlayStation 3">PlayStation 3</a>)</span></li> <li><a href="/wiki/Tegra#Tegra_X1" title="Tegra">Tegra NX-SoC</a> <span style="font-size:85%;">(<a href="/wiki/Nintendo_Switch" title="Nintendo Switch">Nintendo Switch</a>)</span></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Nvidia Shield</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Shield_Portable" class="mw-redirect" title="Shield Portable">Shield Portable</a></li> <li><a href="/wiki/Shield_Tablet" class="mw-redirect" title="Shield Tablet">Shield Tablet</a></li> <li><a href="/wiki/Shield_Android_TV" class="mw-redirect" title="Shield Android TV">Shield Android TV</a></li> <li><a href="/wiki/GeForce_Now" title="GeForce Now">GeForce Now</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/System_on_a_chip" title="System on a chip">SoCs</a> and embedded</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/GoForce" title="GoForce">GoForce</a></li> <li><a href="/wiki/Nvidia_Drive" title="Nvidia Drive">Drive</a></li> <li><a href="/wiki/Nvidia_Jetson" title="Nvidia Jetson">Jetson</a></li> <li><a href="/wiki/Tegra" title="Tegra">Tegra</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Central_processing_unit" title="Central processing unit">CPUs</a></th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Project_Denver" title="Project Denver">Project Denver</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Computer <a href="/wiki/Chipset" title="Chipset">chipsets</a></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Comparison_of_Nvidia_chipsets" class="mw-redirect" title="Comparison of Nvidia chipsets">nForce</a></li></ul> </div></td></tr></tbody></table><div></div></td></tr></tbody></table><div></div></td></tr><tr><td colspan="2" class="navbox-list navbox-odd hlist" style="width:100%;padding:0"><div style="padding:0 0.25em"></div><table class="nowraplinks mw-collapsible mw-collapsed navbox-subgroup" style="border-spacing:0"><tbody><tr><th scope="col" class="navbox-title" colspan="2"><div id="Company10" style="font-size:114%;margin:0 4em">Company</div></th></tr><tr><td colspan="2" class="navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"></div><table class="nowraplinks navbox-subgroup" style="border-spacing:0"><tbody><tr><th scope="row" class="navbox-group" style="width:1%">Key people</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Jensen_Huang" title="Jensen Huang">Jen-Hsun Huang</a></li> <li><a href="/wiki/Chris_Malachowsky" title="Chris Malachowsky">Chris Malachowsky</a></li> <li><a href="/wiki/Curtis_Priem" title="Curtis Priem">Curtis Priem</a></li> <li><a href="/wiki/David_Kirk_(scientist)" title="David Kirk (scientist)">David Kirk</a></li> <li><a href="/wiki/Bill_Dally" title="Bill Dally">Bill Dally</a></li> <li>Debora Shoquist</li> <li>Ranga Jayaraman</li> <li>Jonah M. Alben</li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Acquisitions</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/3dfx" title="3dfx">3dfx Interactive</a></li> <li><a href="/wiki/Ageia" title="Ageia">Ageia</a></li> <li><a href="/wiki/Acer_Laboratories_Incorporated" class="mw-redirect" title="Acer Laboratories Incorporated">ULi</a></li> <li><a href="/wiki/Bright_Computing" title="Bright Computing">Bright Computing</a></li> <li><a href="/wiki/Cumulus_Networks" title="Cumulus Networks">Cumulus Networks</a></li> <li><a href="/wiki/DeepMap" title="DeepMap">DeepMap</a></li> <li><a href="/wiki/Icera" title="Icera">Icera</a></li> <li><a href="/wiki/Mellanox_Technologies" title="Mellanox Technologies">Mellanox Technologies</a></li> <li><a href="/wiki/Mental_Images" title="Mental Images">Mental Images</a></li> <li><a href="/wiki/PortalPlayer" title="PortalPlayer">PortalPlayer</a></li> <li>Exluna</li> <li>MediaQ</li> <li>Stexar</li></ul> </div></td></tr></tbody></table><div></div></td></tr></tbody></table><div></div></td></tr></tbody></table></div> <div class="navbox-styles"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1129693374" /><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1236075235" /></div><div role="navigation" class="navbox" aria-labelledby="Processor_technologies439" style="padding:3px"><table class="nowraplinks mw-collapsible autocollapse navbox-inner" style="border-spacing:0;background:transparent;color:inherit"><tbody><tr><th scope="col" class="navbox-title" colspan="2"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1129693374" /><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1239400231" /><div class="navbar plainlinks hlist navbar-mini"><ul><li class="nv-view"><a href="/wiki/Template:Processor_technologies" title="Template:Processor technologies"><abbr title="View this template">v</abbr></a></li><li class="nv-talk"><a href="/wiki/Template_talk:Processor_technologies" title="Template talk:Processor technologies"><abbr title="Discuss this template">t</abbr></a></li><li class="nv-edit"><a href="/wiki/Special:EditPage/Template:Processor_technologies" title="Special:EditPage/Template:Processor technologies"><abbr title="Edit this template">e</abbr></a></li></ul></div><div id="Processor_technologies439" style="font-size:114%;margin:0 4em"><a href="/wiki/Processor_(computing)" title="Processor (computing)">Processor technologies</a></div></th></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Model_of_computation" title="Model of computation">Models</a></th><td class="navbox-list-with-group navbox-list navbox-odd hlist" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Abstract_machine" title="Abstract machine">Abstract machine</a></li> <li><a href="/wiki/Stored-program_computer" title="Stored-program computer">Stored-program computer</a></li> <li><a href="/wiki/Finite-state_machine" title="Finite-state machine">Finite-state machine</a> <ul><li><a href="/wiki/Finite-state_machine_with_datapath" class="mw-redirect" title="Finite-state machine with datapath">with datapath</a></li> <li><a href="/wiki/Hierarchical_state_machine" class="mw-redirect" title="Hierarchical state machine">Hierarchical</a></li> <li><a href="/wiki/Deterministic_finite_automaton" title="Deterministic finite automaton">Deterministic finite automaton</a></li> <li><a href="/wiki/Queue_automaton" title="Queue automaton">Queue automaton</a></li> <li><a href="/wiki/Cellular_automaton" title="Cellular automaton">Cellular automaton</a></li> <li><a href="/wiki/Quantum_cellular_automaton" title="Quantum cellular automaton">Quantum cellular automaton</a></li></ul></li> <li><a href="/wiki/Turing_machine" title="Turing machine">Turing machine</a> <ul><li><a href="/wiki/Alternating_Turing_machine" title="Alternating Turing machine">Alternating Turing machine</a></li> <li><a href="/wiki/Universal_Turing_machine" title="Universal Turing machine">Universal</a></li> <li><a href="/wiki/Post%E2%80%93Turing_machine" title="Post–Turing machine">Post–Turing</a></li> <li><a href="/wiki/Quantum_Turing_machine" title="Quantum Turing machine">Quantum</a></li> <li><a href="/wiki/Nondeterministic_Turing_machine" title="Nondeterministic Turing machine">Nondeterministic Turing machine</a></li> <li><a href="/wiki/Probabilistic_Turing_machine" title="Probabilistic Turing machine">Probabilistic Turing machine</a></li> <li><a href="/wiki/Hypercomputation" title="Hypercomputation">Hypercomputation</a></li> <li><a href="/wiki/Zeno_machine" title="Zeno machine">Zeno machine</a></li></ul></li> <li><a href="/wiki/History_of_general-purpose_CPUs#Belt_machine_architecture" title="History of general-purpose CPUs">Belt machine</a></li> <li><a href="/wiki/Stack_machine" title="Stack machine">Stack machine</a></li> <li><a href="/wiki/Register_machine" title="Register machine">Register machines</a> <ul><li><a href="/wiki/Counter_machine" title="Counter machine">Counter</a></li> <li><a href="/wiki/Pointer_machine" title="Pointer machine">Pointer</a></li> <li><a href="/wiki/Random-access_machine" title="Random-access machine">Random-access</a></li> <li><a href="/wiki/Random-access_stored-program_machine" title="Random-access stored-program machine">Random-access stored program</a></li></ul></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Computer_architecture" title="Computer architecture">Architecture</a></th><td class="navbox-list-with-group navbox-list navbox-even hlist" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Microarchitecture" title="Microarchitecture">Microarchitecture</a></li> <li><a href="/wiki/Von_Neumann_architecture" title="Von Neumann architecture">Von Neumann</a></li> <li><a href="/wiki/Harvard_architecture" title="Harvard architecture">Harvard</a> <ul><li><a href="/wiki/Modified_Harvard_architecture" title="Modified Harvard architecture">modified</a></li></ul></li> <li><a href="/wiki/Dataflow_architecture" title="Dataflow architecture">Dataflow</a></li> <li><a href="/wiki/Transport_triggered_architecture" title="Transport triggered architecture">Transport-triggered</a></li> <li><a href="/wiki/Cellular_architecture" title="Cellular architecture">Cellular</a></li> <li><a href="/wiki/Endianness" title="Endianness">Endianness</a></li> <li><a href="/wiki/Computer_data_storage" title="Computer data storage">Memory access</a> <ul><li><a href="/wiki/Non-uniform_memory_access" title="Non-uniform memory access">NUMA</a></li> <li><a href="/wiki/Uniform_memory_access" title="Uniform memory access">HUMA</a></li> <li><a href="/wiki/Load%E2%80%93store_architecture" title="Load–store architecture">Load–store</a></li> <li><a href="/wiki/Register%E2%80%93memory_architecture" title="Register–memory architecture">Register/memory</a></li></ul></li> <li><a href="/wiki/Cache_hierarchy" title="Cache hierarchy">Cache hierarchy</a></li> <li><a href="/wiki/Memory_hierarchy" title="Memory hierarchy">Memory hierarchy</a> <ul><li><a href="/wiki/Virtual_memory" title="Virtual memory">Virtual memory</a></li> <li><a href="/wiki/Secondary_storage" class="mw-redirect" title="Secondary storage">Secondary storage</a></li></ul></li> <li><a href="/wiki/Heterogeneous_System_Architecture" title="Heterogeneous System Architecture">Heterogeneous</a></li> <li><a href="/wiki/Fabric_computing" title="Fabric computing">Fabric</a></li> <li><a href="/wiki/Multiprocessing" title="Multiprocessing">Multiprocessing</a></li> <li><a href="/wiki/Cognitive_computing" title="Cognitive computing">Cognitive</a></li> <li><a href="/wiki/Neuromorphic_engineering" class="mw-redirect" title="Neuromorphic engineering">Neuromorphic</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Instruction_set_architecture" title="Instruction set architecture">Instruction set<br />architectures</a></th><td class="navbox-list-with-group navbox-list navbox-odd hlist" style="width:100%;padding:0"><div style="padding:0 0.25em"></div><table class="nowraplinks navbox-subgroup" style="border-spacing:0"><tbody><tr><th scope="row" class="navbox-group" style="width:1%">Types</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Orthogonal_instruction_set" title="Orthogonal instruction set">Orthogonal instruction set</a></li> <li><a href="/wiki/Complex_instruction_set_computer" title="Complex instruction set computer">CISC</a></li> <li><a href="/wiki/Reduced_instruction_set_computer" title="Reduced instruction set computer">RISC</a></li> <li><a href="/wiki/Application-specific_instruction_set_processor" title="Application-specific instruction set processor">Application-specific</a></li> <li><a href="/wiki/Explicit_data_graph_execution" title="Explicit data graph execution">EDGE</a> <ul><li><a href="/wiki/TRIPS_architecture" title="TRIPS architecture">TRIPS</a></li></ul></li> <li><a href="/wiki/Very_long_instruction_word" title="Very long instruction word">VLIW</a> <ul><li><a href="/wiki/Explicitly_parallel_instruction_computing" title="Explicitly parallel instruction computing">EPIC</a></li></ul></li> <li><a href="/wiki/Minimal_instruction_set_computer" title="Minimal instruction set computer">MISC</a></li> <li><a href="/wiki/One-instruction_set_computer" title="One-instruction set computer">OISC</a></li> <li><a href="/wiki/No_instruction_set_computing" title="No instruction set computing">NISC</a></li> <li><a href="/wiki/Zero_instruction_set_computer" class="mw-redirect" title="Zero instruction set computer">ZISC</a></li> <li><a href="/wiki/VISC_architecture" title="VISC architecture">VISC architecture</a></li> <li><a href="/wiki/Quantum_computing" title="Quantum computing">Quantum computing</a></li> <li><a href="/wiki/Comparison_of_instruction_set_architectures" title="Comparison of instruction set architectures">Comparison</a> <ul><li><a href="/wiki/Addressing_mode" title="Addressing mode">Addressing modes</a></li></ul></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Instruction<br />sets</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Motorola_68000_series" title="Motorola 68000 series">Motorola 68000 series</a></li> <li><a href="/wiki/VAX" title="VAX">VAX</a></li> <li><a href="/wiki/PDP-11_architecture" title="PDP-11 architecture">PDP-11</a></li> <li><a href="/wiki/X86" title="X86">x86</a></li> <li><a href="/wiki/ARM_architecture_family" title="ARM architecture family">ARM</a></li> <li><a href="/wiki/Stanford_MIPS" title="Stanford MIPS">Stanford MIPS</a></li> <li><a href="/wiki/MIPS_architecture" title="MIPS architecture">MIPS</a></li> <li><a href="/wiki/MIPS-X" title="MIPS-X">MIPS-X</a></li> <li>Power <ul><li><a href="/wiki/IBM_POWER_architecture" title="IBM POWER architecture">POWER</a></li> <li><a href="/wiki/PowerPC" title="PowerPC">PowerPC</a></li> <li><a href="/wiki/Power_ISA" title="Power ISA">Power ISA</a></li></ul></li> <li><a href="/wiki/Clipper_architecture" title="Clipper architecture">Clipper architecture</a></li> <li><a href="/wiki/SPARC" title="SPARC">SPARC</a></li> <li><a href="/wiki/SuperH" title="SuperH">SuperH</a></li> <li><a href="/wiki/DEC_Alpha" title="DEC Alpha">DEC Alpha</a></li> <li><a href="/wiki/ETRAX_CRIS" title="ETRAX CRIS">ETRAX CRIS</a></li> <li><a href="/wiki/M32R" title="M32R">M32R</a></li> <li><a href="/wiki/Unicore" title="Unicore">Unicore</a></li> <li><a href="/wiki/IA-64" title="IA-64">Itanium</a></li> <li><a href="/wiki/OpenRISC" title="OpenRISC">OpenRISC</a></li> <li><a href="/wiki/RISC-V" title="RISC-V">RISC-V</a></li> <li><a href="/wiki/MicroBlaze" title="MicroBlaze">MicroBlaze</a></li> <li><a href="/wiki/Little_man_computer" title="Little man computer">LMC</a></li> <li>System/3x0 <ul><li><a href="/wiki/IBM_System/360_architecture" title="IBM System/360 architecture">S/360</a></li> <li><a href="/wiki/IBM_System/370" title="IBM System/370">S/370</a></li> <li><a href="/wiki/IBM_System/390" title="IBM System/390">S/390</a></li> <li><a href="/wiki/Z/Architecture" title="Z/Architecture">z/Architecture</a></li></ul></li> <li>Tilera ISA</li> <li><a href="/wiki/VISC_architecture" title="VISC architecture">VISC architecture</a></li> <li><a href="/wiki/Adapteva#Products" class="mw-redirect" title="Adapteva">Epiphany architecture</a></li> <li><a href="/wiki/Comparison_of_instruction_set_architectures" title="Comparison of instruction set architectures">Others</a></li></ul> </div></td></tr></tbody></table><div></div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Instruction_cycle" title="Instruction cycle">Execution</a></th><td class="navbox-list-with-group navbox-list navbox-odd hlist" style="width:100%;padding:0"><div style="padding:0 0.25em"></div><table class="nowraplinks navbox-subgroup" style="border-spacing:0"><tbody><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Instruction_pipelining" title="Instruction pipelining">Instruction pipelining</a></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Pipeline_stall" title="Pipeline stall">Pipeline stall</a></li> <li><a href="/wiki/Operand_forwarding" title="Operand forwarding">Operand forwarding</a></li> <li><a href="/wiki/Classic_RISC_pipeline" title="Classic RISC pipeline">Classic RISC pipeline</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Hazard_(computer_architecture)" title="Hazard (computer architecture)">Hazards</a></th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Data_dependency" title="Data dependency">Data dependency</a></li> <li><a href="/wiki/Structural_hazard" class="mw-redirect" title="Structural hazard">Structural</a></li> <li><a href="/wiki/Control_hazard" class="mw-redirect" title="Control hazard">Control</a></li> <li><a href="/wiki/False_sharing" title="False sharing">False sharing</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Out-of-order_execution" title="Out-of-order execution">Out-of-order</a></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Scoreboarding" title="Scoreboarding">Scoreboarding</a></li> <li><a href="/wiki/Tomasulo%27s_algorithm" title="Tomasulo&#39;s algorithm">Tomasulo's algorithm</a> <ul><li><a href="/wiki/Reservation_station" title="Reservation station">Reservation station</a></li> <li><a href="/wiki/Re-order_buffer" title="Re-order buffer">Re-order buffer</a></li></ul></li> <li><a href="/wiki/Register_renaming" title="Register renaming">Register renaming</a></li> <li><a href="/wiki/Wide-issue" title="Wide-issue">Wide-issue</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Speculative_execution" title="Speculative execution">Speculative</a></th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Branch_predictor" title="Branch predictor">Branch prediction</a></li> <li><a href="/wiki/Memory_dependence_prediction" title="Memory dependence prediction">Memory dependence prediction</a></li></ul> </div></td></tr></tbody></table><div></div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Parallel_computing" title="Parallel computing">Parallelism</a></th><td class="navbox-list-with-group navbox-list navbox-odd hlist" style="width:100%;padding:0"><div style="padding:0 0.25em"></div><table class="nowraplinks navbox-subgroup" style="border-spacing:0"><tbody><tr><th scope="row" class="navbox-group" style="width:1%">Level</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Bit-level_parallelism" title="Bit-level parallelism">Bit</a> <ul><li><a href="/wiki/Bit-serial_architecture" title="Bit-serial architecture">Bit-serial</a></li> <li><a href="/wiki/Word_(computer_architecture)" title="Word (computer architecture)">Word</a></li></ul></li> <li><a href="/wiki/Instruction-level_parallelism" title="Instruction-level parallelism">Instruction</a></li> <li><a href="/wiki/Instruction_pipelining" title="Instruction pipelining">Pipelining</a> <ul><li><a href="/wiki/Scalar_processor" title="Scalar processor">Scalar</a></li> <li><a href="/wiki/Superscalar_processor" title="Superscalar processor">Superscalar</a></li></ul></li> <li><a href="/wiki/Task_parallelism" title="Task parallelism">Task</a> <ul><li><a href="/wiki/Thread_(computing)" title="Thread (computing)">Thread</a></li> <li><a href="/wiki/Process_(computing)" title="Process (computing)">Process</a></li></ul></li> <li><a href="/wiki/Data_parallelism" title="Data parallelism">Data</a> <ul><li><a href="/wiki/Vector_processor" title="Vector processor">Vector</a></li></ul></li> <li><a href="/wiki/Memory-level_parallelism" title="Memory-level parallelism">Memory</a></li> <li><a href="/wiki/Distributed_architecture" class="mw-redirect" title="Distributed architecture">Distributed</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Multithreading_(computer_architecture)" title="Multithreading (computer architecture)">Multithreading</a></th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Temporal_multithreading" title="Temporal multithreading">Temporal</a></li> <li><a href="/wiki/Simultaneous_multithreading" title="Simultaneous multithreading">Simultaneous</a> <ul><li><a href="/wiki/Hyper-threading" title="Hyper-threading">Hyperthreading</a></li> <li><a href="/wiki/Simultaneous_and_heterogeneous_multithreading" title="Simultaneous and heterogeneous multithreading">Simultaneous and heterogenous</a></li></ul></li> <li><a href="/wiki/Speculative_multithreading" title="Speculative multithreading">Speculative</a></li> <li><a href="/wiki/Preemption_(computing)" title="Preemption (computing)">Preemptive</a></li> <li><a href="/wiki/Cooperative_multitasking" title="Cooperative multitasking">Cooperative</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Flynn%27s_taxonomy" title="Flynn&#39;s taxonomy">Flynn's taxonomy</a></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Single_instruction,_single_data" title="Single instruction, single data">SISD</a></li> <li><a href="/wiki/Single_instruction,_multiple_data" title="Single instruction, multiple data">SIMD</a> <ul><li><a href="/wiki/Single_instruction,_multiple_threads" title="Single instruction, multiple threads">Array processing (SIMT)</a></li> <li><a href="/wiki/Flynn%27s_taxonomy#Pipelined_processor" title="Flynn&#39;s taxonomy">Pipelined processing</a></li> <li><a href="/wiki/Flynn%27s_taxonomy#Associative_processor" title="Flynn&#39;s taxonomy">Associative processing</a></li> <li><a href="/wiki/SWAR" title="SWAR">SWAR</a></li></ul></li> <li><a href="/wiki/Multiple_instruction,_single_data" title="Multiple instruction, single data">MISD</a></li> <li><a href="/wiki/Multiple_instruction,_multiple_data" title="Multiple instruction, multiple data">MIMD</a> <ul><li><a href="/wiki/Single_program,_multiple_data" title="Single program, multiple data">SPMD</a></li></ul></li></ul> </div></td></tr></tbody></table><div></div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Computer_performance" title="Computer performance">Processor<br />performance</a></th><td class="navbox-list-with-group navbox-list navbox-even hlist" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Transistor_count" title="Transistor count">Transistor count</a></li> <li><a href="/wiki/Instructions_per_cycle" title="Instructions per cycle">Instructions per cycle</a> (IPC) <ul><li><a href="/wiki/Cycles_per_instruction" title="Cycles per instruction">Cycles per instruction</a> (CPI)</li></ul></li> <li><a href="/wiki/Instructions_per_second" title="Instructions per second">Instructions per second</a> (IPS)</li> <li><a href="/wiki/FLOPS" class="mw-redirect" title="FLOPS">Floating-point operations per second</a> (FLOPS)</li> <li><a href="/wiki/Transactions_per_second" title="Transactions per second">Transactions per second</a> (TPS)</li> <li><a href="/wiki/SUPS" title="SUPS">Synaptic updates per second</a> (SUPS)</li> <li><a href="/wiki/Performance_per_watt" title="Performance per watt">Performance per watt</a> (PPW)</li> <li><a href="/wiki/Cache_performance_measurement_and_metric" title="Cache performance measurement and metric">Cache performance metrics</a></li> <li><a href="/wiki/Computer_performance_by_orders_of_magnitude" title="Computer performance by orders of magnitude">Computer performance by orders of magnitude</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Processor_(computing)" title="Processor (computing)">Types</a></th><td class="navbox-list-with-group navbox-list navbox-odd hlist" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Central_processing_unit" title="Central processing unit">Central processing unit</a> (CPU)</li> <li><a href="/wiki/Graphics_processing_unit" title="Graphics processing unit">Graphics processing unit</a> (GPU) <ul><li><a href="/wiki/General-purpose_computing_on_graphics_processing_units" title="General-purpose computing on graphics processing units">GPGPU</a></li></ul></li> <li><a href="/wiki/Vector_processor" title="Vector processor">Vector</a></li> <li><a href="/wiki/Barrel_processor" title="Barrel processor">Barrel</a></li> <li><a href="/wiki/Stream_processing" title="Stream processing">Stream</a></li> <li><a href="/wiki/Tile_processor" title="Tile processor">Tile processor</a></li> <li><a href="/wiki/Coprocessor" title="Coprocessor">Coprocessor</a></li> <li><a href="/wiki/Programmable_Array_Logic" title="Programmable Array Logic">PAL</a></li> <li><a href="/wiki/Application-specific_integrated_circuit" title="Application-specific integrated circuit">ASIC</a></li> <li><a href="/wiki/Field-programmable_gate_array" title="Field-programmable gate array">FPGA</a></li> <li><a href="/wiki/Field-programmable_object_array" title="Field-programmable object array">FPOA</a></li> <li><a href="/wiki/Complex_programmable_logic_device" title="Complex programmable logic device">CPLD</a></li> <li><a href="/wiki/Multi-chip_module" title="Multi-chip module">Multi-chip module</a> (MCM)</li> <li><a href="/wiki/System_in_a_package" title="System in a package">System in a package</a> (SiP)</li> <li><a href="/wiki/Package_on_a_package" title="Package on a package">Package on a package</a> (PoP)</li></ul> </div><table class="nowraplinks navbox-subgroup" style="border-spacing:0"><tbody><tr><th scope="row" class="navbox-group" style="width:1%">By application</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Embedded_system" title="Embedded system">Embedded system</a></li> <li><a href="/wiki/Microprocessor" title="Microprocessor">Microprocessor</a></li> <li><a href="/wiki/Microcontroller" title="Microcontroller">Microcontroller</a></li> <li><a href="/wiki/Mobile_processor" title="Mobile processor">Mobile</a></li> <li><a href="/wiki/Ultra-low-voltage_processor" title="Ultra-low-voltage processor">Ultra-low-voltage</a></li> <li><a href="/wiki/Application-specific_instruction_set_processor" title="Application-specific instruction set processor">ASIP</a></li> <li><a href="/wiki/Soft_microprocessor" title="Soft microprocessor">Soft microprocessor</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Systems<br />on chip</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/System_on_a_chip" title="System on a chip">System on a chip</a> (SoC)</li> <li><a href="/wiki/Multiprocessor_system_on_a_chip" class="mw-redirect" title="Multiprocessor system on a chip">Multiprocessor</a> (MPSoC)</li> <li><a href="/wiki/Cypress_PSoC" title="Cypress PSoC">Cypress PSoC</a></li> <li><a href="/wiki/Network_on_a_chip" title="Network on a chip">Network on a chip</a> (NoC)</li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Hardware_acceleration" title="Hardware acceleration">Hardware<br />accelerators</a></th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Coprocessor" title="Coprocessor">Coprocessor</a></li> <li><a href="/wiki/AI_accelerator" class="mw-redirect" title="AI accelerator">AI accelerator</a></li> <li><a href="/wiki/Graphics_processing_unit" title="Graphics processing unit">Graphics processing unit</a> (GPU)</li> <li><a href="/wiki/Image_processor" title="Image processor">Image processor</a></li> <li><a href="/wiki/Vision_processing_unit" title="Vision processing unit">Vision processing unit</a> (VPU)</li> <li><a href="/wiki/Physics_processing_unit" title="Physics processing unit">Physics processing unit</a> (PPU)</li> <li><a href="/wiki/Digital_signal_processor" title="Digital signal processor">Digital signal processor</a> (DSP)</li> <li><a href="/wiki/Tensor_Processing_Unit" title="Tensor Processing Unit">Tensor Processing Unit</a> (TPU)</li> <li><a href="/wiki/Secure_cryptoprocessor" title="Secure cryptoprocessor">Secure cryptoprocessor</a></li> <li><a href="/wiki/Network_processor" title="Network processor">Network processor</a></li> <li><a href="/wiki/Baseband_processor" title="Baseband processor">Baseband processor</a></li></ul> </div></td></tr></tbody></table><div> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Word_(computer_architecture)" title="Word (computer architecture)">Word size</a></th><td class="navbox-list-with-group navbox-list navbox-odd hlist" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/1-bit_computing" title="1-bit computing">1-bit</a></li> <li><a href="/wiki/4-bit_computing" title="4-bit computing">4-bit</a></li> <li><a href="/wiki/8-bit_computing" title="8-bit computing">8-bit</a></li> <li><a href="/wiki/12-bit_computing" title="12-bit computing">12-bit</a></li> <li><a href="/wiki/Apollo_Guidance_Computer" title="Apollo Guidance Computer">15-bit</a></li> <li><a href="/wiki/16-bit_computing" title="16-bit computing">16-bit</a></li> <li><a href="/wiki/24-bit_computing" title="24-bit computing">24-bit</a></li> <li><a href="/wiki/32-bit_computing" title="32-bit computing">32-bit</a></li> <li><a href="/wiki/48-bit_computing" title="48-bit computing">48-bit</a></li> <li><a href="/wiki/64-bit_computing" title="64-bit computing">64-bit</a></li> <li><a href="/wiki/128-bit_computing" title="128-bit computing">128-bit</a></li> <li><a href="/wiki/256-bit_computing" title="256-bit computing">256-bit</a></li> <li><a href="/wiki/512-bit_computing" title="512-bit computing">512-bit</a></li> <li><a href="/wiki/Bit_slicing" title="Bit slicing">bit slicing</a></li> <li><a href="/wiki/Word_(computer_architecture)#Table_of_word_sizes" title="Word (computer architecture)">others</a> <ul><li><a href="/wiki/Word_(computer_architecture)#Variable-word_architectures" title="Word (computer architecture)">variable</a></li></ul></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Core count</th><td class="navbox-list-with-group navbox-list navbox-even hlist" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Single-core" title="Single-core">Single-core</a></li> <li><a href="/wiki/Multi-core_processor" title="Multi-core processor">Multi-core</a></li> <li><a href="/wiki/Manycore_processor" title="Manycore processor">Manycore</a></li> <li><a href="/wiki/Heterogeneous_computing" title="Heterogeneous computing">Heterogeneous architecture</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Components</th><td class="navbox-list-with-group navbox-list navbox-odd hlist" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Central_processing_unit" title="Central processing unit">Core</a></li> <li><a href="/wiki/Cache_(computing)" title="Cache (computing)">Cache</a> <ul><li><a href="/wiki/CPU_cache" title="CPU cache">CPU cache</a></li> <li><a href="/wiki/Scratchpad_memory" title="Scratchpad memory">Scratchpad memory</a></li> <li><a href="/wiki/Data_cache" class="mw-redirect" title="Data cache">Data cache</a></li> <li><a href="/wiki/Instruction_cache" class="mw-redirect" title="Instruction cache">Instruction cache</a></li> <li><a href="/wiki/Cache_replacement_policies" title="Cache replacement policies">replacement policies</a></li> <li><a href="/wiki/Cache_coherence" title="Cache coherence">coherence</a></li></ul></li> <li><a href="/wiki/Bus_(computing)" title="Bus (computing)">Bus</a></li> <li><a href="/wiki/Clock_rate" title="Clock rate">Clock rate</a></li> <li><a href="/wiki/Clock_signal" title="Clock signal">Clock signal</a></li> <li><a href="/wiki/FIFO_(computing_and_electronics)" title="FIFO (computing and electronics)">FIFO</a></li></ul> </div><table class="nowraplinks navbox-subgroup" style="border-spacing:0"><tbody><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Execution_unit" title="Execution unit">Functional<br />units</a></th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Arithmetic_logic_unit" title="Arithmetic logic unit">Arithmetic logic unit</a> (ALU)</li> <li><a href="/wiki/Address_generation_unit" title="Address generation unit">Address generation unit</a> (AGU)</li> <li><a href="/wiki/Floating-point_unit" title="Floating-point unit">Floating-point unit</a> (FPU)</li> <li><a href="/wiki/Memory_management_unit" title="Memory management unit">Memory management unit</a> (MMU) <ul><li><a href="/wiki/Load%E2%80%93store_unit" title="Load–store unit">Load–store unit</a></li> <li><a href="/wiki/Translation_lookaside_buffer" title="Translation lookaside buffer">Translation lookaside buffer</a> (TLB)</li></ul></li> <li><a href="/wiki/Branch_predictor" title="Branch predictor">Branch predictor</a></li> <li><a href="/wiki/Branch_target_predictor" title="Branch target predictor">Branch target predictor</a></li> <li><a href="/wiki/Memory_controller" title="Memory controller">Integrated memory controller</a> (IMC) <ul><li><a href="/wiki/Memory_management_unit" title="Memory management unit">Memory management unit</a></li></ul></li> <li><a href="/wiki/Instruction_decoder" class="mw-redirect" title="Instruction decoder">Instruction decoder</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Logic_gate" title="Logic gate">Logic</a></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Combinational_logic" title="Combinational logic">Combinational</a></li> <li><a href="/wiki/Sequential_logic" title="Sequential logic">Sequential</a></li> <li><a href="/wiki/Glue_logic" title="Glue logic">Glue</a></li> <li><a href="/wiki/Logic_gate" title="Logic gate">Logic gate</a> <ul><li><a href="/wiki/Quantum_logic_gate" title="Quantum logic gate">Quantum</a></li> <li><a href="/wiki/Gate_array" title="Gate array">Array</a></li></ul></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Hardware_register" title="Hardware register">Registers</a></th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Processor_register" title="Processor register">Processor register</a></li> <li><a href="/wiki/Status_register" title="Status register">Status register</a></li> <li><a href="/wiki/Stack_register" title="Stack register">Stack register</a></li> <li><a href="/wiki/Register_file" title="Register file">Register file</a></li> <li><a href="/wiki/Memory_buffer_register" title="Memory buffer register">Memory buffer</a></li> <li><a href="/wiki/Memory_address_register" title="Memory address register">Memory address register</a></li> <li><a href="/wiki/Program_counter" title="Program counter">Program counter</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Control_unit" title="Control unit">Control unit</a></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Hardwired_control_unit" class="mw-redirect" title="Hardwired control unit">Hardwired control unit</a></li> <li><a href="/wiki/Instruction_unit" title="Instruction unit">Instruction unit</a></li> <li><a href="/wiki/Data_buffer" title="Data buffer">Data buffer</a></li> <li><a href="/wiki/Write_buffer" title="Write buffer">Write buffer</a></li> <li><a href="/wiki/Microcode" title="Microcode">Microcode</a> <a href="/wiki/ROM_image" title="ROM image">ROM</a></li> <li><a href="/wiki/Counter_(digital)" title="Counter (digital)">Counter</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Datapath" title="Datapath">Datapath</a></th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Multiplexer" title="Multiplexer">Multiplexer</a></li> <li><a href="/wiki/Demultiplexer" class="mw-redirect" title="Demultiplexer">Demultiplexer</a></li> <li><a href="/wiki/Adder_(electronics)" title="Adder (electronics)">Adder</a></li> <li><a href="/wiki/Binary_multiplier" title="Binary multiplier">Multiplier</a> <ul><li><a href="/wiki/CPU_multiplier" title="CPU multiplier">CPU</a></li></ul></li> <li><a href="/wiki/Binary_decoder" title="Binary decoder">Binary decoder</a> <ul><li><a href="/wiki/Address_decoder" title="Address decoder">Address decoder</a></li> <li><a href="/wiki/Sum-addressed_decoder" title="Sum-addressed decoder">Sum-addressed decoder</a></li></ul></li> <li><a href="/wiki/Barrel_shifter" title="Barrel shifter">Barrel shifter</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Electronic_circuit" title="Electronic circuit">Circuitry</a></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Integrated_circuit" title="Integrated circuit">Integrated circuit</a> <ul><li><a href="/wiki/Three-dimensional_integrated_circuit" title="Three-dimensional integrated circuit">3D</a></li> <li><a href="/wiki/Mixed-signal_integrated_circuit" title="Mixed-signal integrated circuit">Mixed-signal</a></li> <li><a href="/wiki/Power_management_integrated_circuit" title="Power management integrated circuit">Power management</a></li></ul></li> <li><a href="/wiki/Boolean_circuit" title="Boolean circuit">Boolean</a></li> <li><a href="/wiki/Circuit_(computer_science)" title="Circuit (computer science)">Digital</a></li> <li><a href="/wiki/Analogue_electronics" title="Analogue electronics">Analog</a></li> <li><a href="/wiki/Quantum_circuit" title="Quantum circuit">Quantum</a></li> <li><a href="/wiki/Switch#Electronic_switches" title="Switch">Switch</a></li></ul> </div></td></tr></tbody></table><div> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Power_management" title="Power management">Power<br />management</a></th><td class="navbox-list-with-group navbox-list navbox-even hlist" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Power_Management_Unit" title="Power Management Unit">PMU</a></li> <li><a href="/wiki/Advanced_Power_Management" title="Advanced Power Management">APM</a></li> <li><a href="/wiki/ACPI" title="ACPI">ACPI</a></li> <li><a href="/wiki/Dynamic_frequency_scaling" title="Dynamic frequency scaling">Dynamic frequency scaling</a></li> <li><a href="/wiki/Dynamic_voltage_scaling" title="Dynamic voltage scaling">Dynamic voltage scaling</a></li> <li><a href="/wiki/Clock_gating" title="Clock gating">Clock gating</a></li> <li><a href="/wiki/Performance_per_watt" title="Performance per watt">Performance per watt</a> (PPW)</li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Related</th><td class="navbox-list-with-group navbox-list navbox-odd hlist" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/History_of_general-purpose_CPUs" title="History of general-purpose CPUs">History of general-purpose CPUs</a></li> <li><a href="/wiki/Microprocessor_chronology" title="Microprocessor chronology">Microprocessor chronology</a></li> <li><a href="/wiki/Processor_design" title="Processor design">Processor design</a></li> <li><a href="/wiki/Digital_electronics" title="Digital electronics">Digital electronics</a></li> <li><a href="/wiki/Hardware_security_module" title="Hardware security module">Hardware security module</a></li> <li><a href="/wiki/Semiconductor_device_fabrication" title="Semiconductor device fabrication">Semiconductor device fabrication</a></li> <li><a href="/wiki/Tick%E2%80%93tock_model" title="Tick–tock model">Tick–tock model</a></li> <li><a href="/wiki/Pin_grid_array" title="Pin grid array">Pin grid array</a></li> <li><a href="/wiki/Chip_carrier" title="Chip carrier">Chip carrier</a></li></ul> </div></td></tr></tbody></table></div> <div class="navbox-styles"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1129693374" /><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1236075235" /></div><div role="navigation" class="navbox" aria-labelledby="Parallel_computing346" style="padding:3px"><table class="nowraplinks hlist mw-collapsible autocollapse navbox-inner" style="border-spacing:0;background:transparent;color:inherit"><tbody><tr><th scope="col" class="navbox-title" colspan="2"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1129693374" /><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1239400231" /><div class="navbar plainlinks hlist navbar-mini"><ul><li class="nv-view"><a href="/wiki/Template:Parallel_computing" title="Template:Parallel computing"><abbr title="View this template">v</abbr></a></li><li class="nv-talk"><a href="/wiki/Template_talk:Parallel_computing" title="Template talk:Parallel computing"><abbr title="Discuss this template">t</abbr></a></li><li class="nv-edit"><a href="/wiki/Special:EditPage/Template:Parallel_computing" title="Special:EditPage/Template:Parallel computing"><abbr title="Edit this template">e</abbr></a></li></ul></div><div id="Parallel_computing346" style="font-size:114%;margin:0 4em"><a href="/wiki/Parallel_computing" title="Parallel computing">Parallel computing</a></div></th></tr><tr><th scope="row" class="navbox-group" style="width:1%">General</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Distributed_computing" title="Distributed computing">Distributed computing</a></li> <li><a href="/wiki/Parallel_computing" title="Parallel computing">Parallel computing</a></li> <li><a href="/wiki/Massively_parallel" title="Massively parallel">Massively parallel</a></li> <li><a href="/wiki/Cloud_computing" title="Cloud computing">Cloud computing</a></li> <li><a href="/wiki/High-performance_computing" title="High-performance computing">High-performance computing</a></li> <li><a href="/wiki/Multiprocessing" title="Multiprocessing">Multiprocessing</a></li> <li><a href="/wiki/Manycore_processor" title="Manycore processor">Manycore processor</a></li> <li><a href="/wiki/General-purpose_computing_on_graphics_processing_units" title="General-purpose computing on graphics processing units">GPGPU</a></li> <li><a href="/wiki/Computer_network" title="Computer network">Computer network</a></li> <li><a href="/wiki/Systolic_array" title="Systolic array">Systolic array</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Levels</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Bit-level_parallelism" title="Bit-level parallelism">Bit</a></li> <li><a href="/wiki/Instruction-level_parallelism" title="Instruction-level parallelism">Instruction</a></li> <li><a href="/wiki/Task_parallelism" title="Task parallelism">Thread</a></li> <li><a href="/wiki/Task_parallelism" title="Task parallelism">Task</a></li> <li><a href="/wiki/Data_parallelism" title="Data parallelism">Data</a></li> <li><a href="/wiki/Memory-level_parallelism" title="Memory-level parallelism">Memory</a></li> <li><a href="/wiki/Loop-level_parallelism" title="Loop-level parallelism">Loop</a></li> <li><a href="/wiki/Pipeline_(computing)" title="Pipeline (computing)">Pipeline</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Multithreading_(computer_architecture)" title="Multithreading (computer architecture)">Multithreading</a></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Temporal_multithreading" title="Temporal multithreading">Temporal</a></li> <li><a href="/wiki/Simultaneous_multithreading" title="Simultaneous multithreading">Simultaneous</a> (SMT)</li> <li><a href="/wiki/Simultaneous_and_heterogeneous_multithreading" title="Simultaneous and heterogeneous multithreading">Simultaneous and heterogenous</a></li> <li><a href="/wiki/Speculative_multithreading" title="Speculative multithreading">Speculative</a> (SpMT)</li> <li><a href="/wiki/Preemption_(computing)" title="Preemption (computing)">Preemptive</a></li> <li><a href="/wiki/Computer_multitasking#Cooperative_multitasking" title="Computer multitasking">Cooperative</a></li> <li><a href="/wiki/Bulldozer_(microarchitecture)#Bulldozer_core" title="Bulldozer (microarchitecture)">Clustered multi-thread</a> (CMT)</li> <li><a href="/wiki/Hardware_scout" title="Hardware scout">Hardware scout</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Theory</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Parallel_RAM" title="Parallel RAM">PRAM model</a></li> <li><a href="/wiki/Parallel_external_memory" title="Parallel external memory">PEM model</a></li> <li><a href="/wiki/Analysis_of_parallel_algorithms" title="Analysis of parallel algorithms">Analysis of parallel algorithms</a></li> <li><a href="/wiki/Amdahl%27s_law" title="Amdahl&#39;s law">Amdahl's law</a></li> <li><a href="/wiki/Gustafson%27s_law" title="Gustafson&#39;s law">Gustafson's law</a></li> <li><a href="/wiki/Cost_efficiency" title="Cost efficiency">Cost efficiency</a></li> <li><a href="/wiki/Karp%E2%80%93Flatt_metric" title="Karp–Flatt metric">Karp–Flatt metric</a></li> <li><a href="/wiki/Parallel_slowdown" title="Parallel slowdown">Slowdown</a></li> <li><a href="/wiki/Speedup" title="Speedup">Speedup</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Elements</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Process_(computing)" title="Process (computing)">Process</a></li> <li><a href="/wiki/Thread_(computing)" title="Thread (computing)">Thread</a></li> <li><a href="/wiki/Fiber_(computer_science)" title="Fiber (computer science)">Fiber</a></li> <li><a href="/wiki/Instruction_window" title="Instruction window">Instruction window</a></li> <li><a href="/wiki/Array_(data_structure)" title="Array (data structure)">Array</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Coordination</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Multiprocessing" title="Multiprocessing">Multiprocessing</a></li> <li><a href="/wiki/Memory_coherence" title="Memory coherence">Memory coherence</a></li> <li><a href="/wiki/Cache_coherence" title="Cache coherence">Cache coherence</a></li> <li><a href="/wiki/Cache_invalidation" title="Cache invalidation">Cache invalidation</a></li> <li><a href="/wiki/Barrier_(computer_science)" title="Barrier (computer science)">Barrier</a></li> <li><a href="/wiki/Synchronization_(computer_science)" title="Synchronization (computer science)">Synchronization</a></li> <li><a href="/wiki/Application_checkpointing" title="Application checkpointing">Application checkpointing</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Computer_programming" title="Computer programming">Programming</a></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Stream_processing" title="Stream processing">Stream processing</a></li> <li><a href="/wiki/Dataflow_programming" title="Dataflow programming">Dataflow programming</a></li> <li><a href="/wiki/Parallel_programming_model" title="Parallel programming model">Models</a> <ul><li><a href="/wiki/Implicit_parallelism" title="Implicit parallelism">Implicit parallelism</a></li> <li><a href="/wiki/Explicit_parallelism" title="Explicit parallelism">Explicit parallelism</a></li> <li><a href="/wiki/Concurrency_(computer_science)" title="Concurrency (computer science)">Concurrency</a></li></ul></li> <li><a href="/wiki/Non-blocking_algorithm" title="Non-blocking algorithm">Non-blocking algorithm</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Computer_hardware" title="Computer hardware">Hardware</a></th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Flynn%27s_taxonomy" title="Flynn&#39;s taxonomy">Flynn's taxonomy</a> <ul><li><a href="/wiki/Single_instruction,_single_data" title="Single instruction, single data">SISD</a></li> <li><a href="/wiki/Single_instruction,_multiple_data" title="Single instruction, multiple data">SIMD</a> <ul><li><a href="/wiki/Single_instruction,_multiple_threads" title="Single instruction, multiple threads">Array processing</a> (SIMT)</li> <li><a href="/wiki/Flynn%27s_taxonomy#Pipelined_processor" title="Flynn&#39;s taxonomy">Pipelined processing</a></li> <li><a href="/wiki/Flynn%27s_taxonomy#Associative_processor" title="Flynn&#39;s taxonomy">Associative processing</a></li></ul></li> <li><a href="/wiki/Multiple_instruction,_single_data" title="Multiple instruction, single data">MISD</a></li> <li><a href="/wiki/Multiple_instruction,_multiple_data" title="Multiple instruction, multiple data">MIMD</a></li></ul></li> <li><a href="/wiki/Dataflow_architecture" title="Dataflow architecture">Dataflow architecture</a></li> <li><a href="/wiki/Instruction_pipelining" title="Instruction pipelining">Pipelined processor</a></li> <li><a href="/wiki/Superscalar_processor" title="Superscalar processor">Superscalar processor</a></li> <li><a href="/wiki/Vector_processor" title="Vector processor">Vector processor</a></li> <li><a href="/wiki/Multiprocessing" title="Multiprocessing">Multiprocessor</a> <ul><li><a href="/wiki/Symmetric_multiprocessing" title="Symmetric multiprocessing">symmetric</a></li> <li><a href="/wiki/Asymmetric_multiprocessing" title="Asymmetric multiprocessing">asymmetric</a></li></ul></li> <li><a href="/wiki/Semiconductor_memory" title="Semiconductor memory">Memory</a> <ul><li><a href="/wiki/Shared_memory" title="Shared memory">shared</a></li> <li><a href="/wiki/Distributed_memory" title="Distributed memory">distributed</a></li> <li><a href="/wiki/Distributed_shared_memory" title="Distributed shared memory">distributed shared</a></li> <li><a href="/wiki/Uniform_memory_access" title="Uniform memory access">UMA</a></li> <li><a href="/wiki/Non-uniform_memory_access" title="Non-uniform memory access">NUMA</a></li> <li><a href="/wiki/Cache-only_memory_architecture" title="Cache-only memory architecture">COMA</a></li></ul></li> <li><a href="/wiki/Massively_parallel" title="Massively parallel">Massively parallel</a> computer</li> <li><a href="/wiki/Computer_cluster" title="Computer cluster">Computer cluster</a> <ul><li><a href="/wiki/Beowulf_cluster" title="Beowulf cluster">Beowulf cluster</a></li></ul></li> <li><a href="/wiki/Grid_computing" title="Grid computing">Grid computer</a></li> <li><a href="/wiki/Hardware_acceleration" title="Hardware acceleration">Hardware acceleration</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/API" title="API">APIs</a></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Ateji_PX" title="Ateji PX">Ateji PX</a></li> <li><a href="/wiki/Boost_(C%2B%2B_libraries)" title="Boost (C++ libraries)">Boost</a></li> <li><a href="/wiki/Chapel_(programming_language)" title="Chapel (programming language)">Chapel</a></li> <li><a href="/wiki/HPX" title="HPX">HPX</a></li> <li><a href="/wiki/Charm%2B%2B" title="Charm++">Charm++</a></li> <li><a href="/wiki/Cilk" title="Cilk">Cilk</a></li> <li><a href="/wiki/Coarray_Fortran" title="Coarray Fortran">Coarray Fortran</a></li> <li><a class="mw-selflink selflink">CUDA</a></li> <li><a href="/wiki/Dryad_(programming)" title="Dryad (programming)">Dryad</a></li> <li><a href="/wiki/C%2B%2B_AMP" title="C++ AMP">C++ AMP</a></li> <li><a href="/wiki/Global_Arrays" title="Global Arrays">Global Arrays</a></li> <li><a href="/wiki/GPUOpen" title="GPUOpen">GPUOpen</a></li> <li><a href="/wiki/Message_Passing_Interface" title="Message Passing Interface">MPI</a></li> <li><a href="/wiki/OpenMP" title="OpenMP">OpenMP</a></li> <li><a href="/wiki/OpenCL" title="OpenCL">OpenCL</a></li> <li><a href="/wiki/OpenHMPP" title="OpenHMPP">OpenHMPP</a></li> <li><a href="/wiki/OpenACC" title="OpenACC">OpenACC</a></li> <li><a href="/wiki/Parallel_Extensions" title="Parallel Extensions">Parallel Extensions</a></li> <li><a href="/wiki/Parallel_Virtual_Machine" title="Parallel Virtual Machine">PVM</a></li> <li><a href="/wiki/Pthreads" title="Pthreads">pthreads</a></li> <li><a href="/wiki/RaftLib" title="RaftLib">RaftLib</a></li> <li><a href="/wiki/ROCm" title="ROCm">ROCm</a></li> <li><a href="/wiki/Unified_Parallel_C" title="Unified Parallel C">UPC</a></li> <li><a href="/wiki/Threading_Building_Blocks" title="Threading Building Blocks">TBB</a></li> <li><a href="/wiki/ZPL_(programming_language)" class="mw-redirect" title="ZPL (programming language)">ZPL</a></li></ul> </div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Problems</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em"> <ul><li><a href="/wiki/Automatic_parallelization" title="Automatic parallelization">Automatic parallelization</a></li> <li><a href="/wiki/Deadlock_(computer_science)" title="Deadlock (computer science)">Deadlock</a></li> <li><a href="/wiki/Deterministic_algorithm" title="Deterministic algorithm">Deterministic algorithm</a></li> <li><a href="/wiki/Embarrassingly_parallel" title="Embarrassingly parallel">Embarrassingly parallel</a></li> <li><a href="/wiki/Parallel_slowdown" title="Parallel slowdown">Parallel slowdown</a></li> <li><a href="/wiki/Race_condition" title="Race condition">Race condition</a></li> <li><a href="/wiki/Software_lockout" title="Software lockout">Software lockout</a></li> <li><a href="/wiki/Scalability" title="Scalability">Scalability</a></li> <li><a href="/wiki/Starvation_(computer_science)" title="Starvation (computer science)">Starvation</a></li></ul> </div></td></tr><tr><td class="navbox-abovebelow" colspan="2"><div> <ul><li><span class="noviewer" typeof="mw:File"><span title="Category"><img alt="" src="//upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/16px-Symbol_category_class.svg.png" decoding="async" width="16" height="16" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/23px-Symbol_category_class.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/31px-Symbol_category_class.svg.png 2x" data-file-width="180" data-file-height="185" /></span></span>&#160;<a href="/wiki/Category:Parallel_computing" title="Category:Parallel computing">Category: Parallel computing</a></li></ul> </div></td></tr></tbody></table></div> <div class="navbox-styles"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1129693374" /><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1236075235" /></div><div role="navigation" class="navbox authority-control" aria-label="Navbox582" style="padding:3px"><table class="nowraplinks hlist navbox-inner" style="border-spacing:0;background:transparent;color:inherit"><tbody><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Help:Authority_control" title="Help:Authority control">Authority control databases</a>: National <span class="mw-valign-text-top noprint" typeof="mw:File/Frameless"><a href="https://www.wikidata.org/wiki/Q477690#identifiers" title="Edit this at Wikidata"><img alt="Edit this at Wikidata" src="//upload.wikimedia.org/wikipedia/en/thumb/8/8a/OOjs_UI_icon_edit-ltr-progressive.svg/10px-OOjs_UI_icon_edit-ltr-progressive.svg.png" decoding="async" width="10" height="10" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/en/thumb/8/8a/OOjs_UI_icon_edit-ltr-progressive.svg/15px-OOjs_UI_icon_edit-ltr-progressive.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/8/8a/OOjs_UI_icon_edit-ltr-progressive.svg/20px-OOjs_UI_icon_edit-ltr-progressive.svg.png 2x" data-file-width="20" data-file-height="20" /></a></span></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"><ul><li><span class="uid"><a rel="nofollow" class="external text" href="https://d-nb.info/gnd/7719528-0">Germany</a></span></li><li><span class="uid"><a rel="nofollow" class="external text" href="https://id.loc.gov/authorities/sh2013001284">United States</a></span></li><li><span class="uid"><a rel="nofollow" class="external text" href="https://www.nli.org.il/en/authorities/987007568350905171">Israel</a></span></li></ul></div></td></tr></tbody></table></div> <!-- NewPP limit report Parsed by mw‐api‐ext.eqiad.main‐79c68c7457‐wm95s Cached time: 20250324195919 Cache expiry: 619246 Reduced expiry: true Complications: [vary‐revision‐sha1, show‐toc] CPU time usage: 1.410 seconds Real time usage: 1.700 seconds Preprocessor visited node count: 12158/1000000 Post‐expand include size: 397876/2097152 bytes Template argument size: 15508/2097152 bytes Highest expansion depth: 26/100 Expensive parser function count: 15/500 Unstrip recursion depth: 1/20 Unstrip post‐expand size: 387571/5000000 bytes Lua time usage: 0.643/10.000 seconds Lua memory usage: 7446479/52428800 bytes Number of Wikibase entities loaded: 1/400 --> <!-- Transclusion expansion time report (%,ms,calls,template) 100.00% 1319.394 1 -total 38.82% 512.186 1 Template:Reflist 24.70% 325.846 73 Template:Cite_web 12.32% 162.536 12 Template:Navbox 10.59% 139.680 2 Template:Infobox 8.65% 114.190 1 Template:Infobox_software 8.25% 108.820 1 Template:Nvidia 8.07% 106.463 1 Template:Navbox_with_collapsible_groups 7.63% 100.632 1 Template:Short_description 7.59% 100.153 1 Template:Multiple_issues --> <!-- Saved in parser cache with key enwiki:pcache:7933386:|#|:idhash:canonical and timestamp 20250324195919 and revision id 1282172238. Rendering was triggered because: api-parse --> </div><!--esi <esi:include src="/esitest-fa8a495983347898/content" /> --><noscript><img src="https://login.wikimedia.org/wiki/Special:CentralAutoLogin/start?useformat=desktop&amp;type=1x1&amp;usesul3=0" alt="" width="1" height="1" style="border: none; position: absolute;"></noscript> <div class="printfooter" data-nosnippet="">Retrieved from "<a dir="ltr" href="https://en.wikipedia.org/w/index.php?title=CUDA&amp;oldid=1282172238">https://en.wikipedia.org/w/index.php?title=CUDA&amp;oldid=1282172238</a>"</div></div> <div id="catlinks" class="catlinks" data-mw="interface"><div id="mw-normal-catlinks" class="mw-normal-catlinks"><a href="/wiki/Help:Category" title="Help:Category">Categories</a>: <ul><li><a href="/wiki/Category:Computer_physics_engines" title="Category:Computer physics engines">Computer physics engines</a></li><li><a href="/wiki/Category:GPGPU" title="Category:GPGPU">GPGPU</a></li><li><a href="/wiki/Category:GPGPU_libraries" title="Category:GPGPU libraries">GPGPU libraries</a></li><li><a href="/wiki/Category:Graphics_hardware" title="Category:Graphics hardware">Graphics hardware</a></li><li><a href="/wiki/Category:Nvidia_software" title="Category:Nvidia software">Nvidia software</a></li><li><a href="/wiki/Category:Parallel_computing" title="Category:Parallel computing">Parallel computing</a></li><li><a href="/wiki/Category:Graphics_cards" title="Category:Graphics cards">Graphics cards</a></li><li><a href="/wiki/Category:Video_game_hardware" title="Category:Video game hardware">Video game hardware</a></li></ul></div><div id="mw-hidden-catlinks" class="mw-hidden-catlinks mw-hidden-cats-hidden">Hidden categories: <ul><li><a href="/wiki/Category:Articles_with_short_description" title="Category:Articles with short description">Articles with short description</a></li><li><a href="/wiki/Category:Short_description_matches_Wikidata" title="Category:Short description matches Wikidata">Short description matches Wikidata</a></li><li><a href="/wiki/Category:Articles_needing_cleanup_from_February_2024" title="Category:Articles needing cleanup from February 2024">Articles needing cleanup from February 2024</a></li><li><a href="/wiki/Category:All_pages_needing_cleanup" title="Category:All pages needing cleanup">All pages needing cleanup</a></li><li><a href="/wiki/Category:Articles_containing_how-to_sections" title="Category:Articles containing how-to sections">Articles containing how-to sections</a></li><li><a href="/wiki/Category:Articles_containing_pro_and_con_lists" title="Category:Articles containing pro and con lists">Articles containing pro and con lists</a></li><li><a href="/wiki/Category:Wikipedia_articles_with_style_issues_from_February_2024" title="Category:Wikipedia articles with style issues from February 2024">Wikipedia articles with style issues from February 2024</a></li><li><a href="/wiki/Category:All_articles_with_style_issues" title="Category:All articles with style issues">All articles with style issues</a></li><li><a href="/wiki/Category:Articles_with_multiple_maintenance_issues" title="Category:Articles with multiple maintenance issues">Articles with multiple maintenance issues</a></li><li><a href="/wiki/Category:Wikipedia_articles_in_need_of_updating_from_December_2022" title="Category:Wikipedia articles in need of updating from December 2022">Wikipedia articles in need of updating from December 2022</a></li><li><a href="/wiki/Category:All_Wikipedia_articles_in_need_of_updating" title="Category:All Wikipedia articles in need of updating">All Wikipedia articles in need of updating</a></li><li><a href="/wiki/Category:All_articles_with_unsourced_statements" title="Category:All articles with unsourced statements">All articles with unsourced statements</a></li><li><a href="/wiki/Category:Articles_with_unsourced_statements_from_May_2016" title="Category:Articles with unsourced statements from May 2016">Articles with unsourced statements from May 2016</a></li></ul></div></div> </div> </main> </div> <div class="mw-footer-container"> <footer id="footer" class="mw-footer" > <ul id="footer-info"> <li id="footer-info-lastmod"> This page was last edited on 24 March 2025, at 19:59<span class="anonymous-show">&#160;(UTC)</span>.</li> <li id="footer-info-copyright">Text is available under the <a href="/wiki/Wikipedia:Text_of_the_Creative_Commons_Attribution-ShareAlike_4.0_International_License" title="Wikipedia:Text of the Creative Commons Attribution-ShareAlike 4.0 International License">Creative Commons Attribution-ShareAlike 4.0 License</a>; additional terms may apply. By using this site, you agree to the <a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Terms_of_Use" class="extiw" title="foundation:Special:MyLanguage/Policy:Terms of Use">Terms of Use</a> and <a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Privacy_policy" class="extiw" title="foundation:Special:MyLanguage/Policy:Privacy policy">Privacy Policy</a>. Wikipedia® is a registered trademark of the <a rel="nofollow" class="external text" href="https://wikimediafoundation.org/">Wikimedia Foundation, Inc.</a>, a non-profit organization.</li> </ul> <ul id="footer-places"> <li id="footer-places-privacy"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Privacy_policy">Privacy policy</a></li> <li id="footer-places-about"><a href="/wiki/Wikipedia:About">About Wikipedia</a></li> <li id="footer-places-disclaimers"><a href="/wiki/Wikipedia:General_disclaimer">Disclaimers</a></li> <li id="footer-places-contact"><a href="//en.wikipedia.org/wiki/Wikipedia:Contact_us">Contact Wikipedia</a></li> <li id="footer-places-wm-codeofconduct"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Universal_Code_of_Conduct">Code of Conduct</a></li> <li id="footer-places-developers"><a href="https://developer.wikimedia.org">Developers</a></li> <li id="footer-places-statslink"><a href="https://stats.wikimedia.org/#/en.wikipedia.org">Statistics</a></li> <li id="footer-places-cookiestatement"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Cookie_statement">Cookie statement</a></li> <li id="footer-places-mobileview"><a href="//en.m.wikipedia.org/w/index.php?title=CUDA&amp;mobileaction=toggle_view_mobile" class="noprint stopMobileRedirectToggle">Mobile view</a></li> </ul> <ul id="footer-icons" class="noprint"> <li id="footer-copyrightico"><a href="https://www.wikimedia.org/" class="cdx-button cdx-button--fake-button cdx-button--size-large cdx-button--fake-button--enabled"><picture><source media="(min-width: 500px)" srcset="/static/images/footer/wikimedia-button.svg" width="84" height="29"><img src="/static/images/footer/wikimedia.svg" width="25" height="25" alt="Wikimedia Foundation" lang="en" loading="lazy"></picture></a></li> <li id="footer-poweredbyico"><a href="https://www.mediawiki.org/" class="cdx-button cdx-button--fake-button cdx-button--size-large cdx-button--fake-button--enabled"><picture><source media="(min-width: 500px)" srcset="/w/resources/assets/poweredby_mediawiki.svg" width="88" height="31"><img src="/w/resources/assets/mediawiki_compact.svg" alt="Powered by MediaWiki" lang="en" width="25" height="25" loading="lazy"></picture></a></li> </ul> </footer> </div> </div> </div> <div class="vector-header-container vector-sticky-header-container"> <div id="vector-sticky-header" class="vector-sticky-header"> <div class="vector-sticky-header-start"> <div class="vector-sticky-header-icon-start vector-button-flush-left vector-button-flush-right" aria-hidden="true"> <button class="cdx-button cdx-button--weight-quiet cdx-button--icon-only vector-sticky-header-search-toggle" tabindex="-1" data-event-name="ui.vector-sticky-search-form.icon"><span class="vector-icon mw-ui-icon-search mw-ui-icon-wikimedia-search"></span> <span>Search</span> </button> </div> <div role="search" class="vector-search-box-vue vector-search-box-show-thumbnail vector-search-box"> <div class="vector-typeahead-search-container"> <div class="cdx-typeahead-search cdx-typeahead-search--show-thumbnail"> <form action="/w/index.php" id="vector-sticky-search-form" class="cdx-search-input cdx-search-input--has-end-button"> <div class="cdx-search-input__input-wrapper" data-search-loc="header-moved"> <div class="cdx-text-input cdx-text-input--has-start-icon"> <input class="cdx-text-input__input" type="search" name="search" placeholder="Search Wikipedia"> <span class="cdx-text-input__icon cdx-text-input__start-icon"></span> </div> <input type="hidden" name="title" value="Special:Search"> </div> <button class="cdx-button cdx-search-input__end-button">Search</button> </form> </div> </div> </div> <div class="vector-sticky-header-context-bar"> <nav aria-label="Contents" class="vector-toc-landmark"> <div id="vector-sticky-header-toc" class="vector-dropdown mw-portlet mw-portlet-sticky-header-toc vector-sticky-header-toc vector-button-flush-left" > <input type="checkbox" id="vector-sticky-header-toc-checkbox" role="button" aria-haspopup="true" data-event-name="ui.dropdown-vector-sticky-header-toc" class="vector-dropdown-checkbox " aria-label="Toggle the table of contents" > <label id="vector-sticky-header-toc-label" for="vector-sticky-header-toc-checkbox" class="vector-dropdown-label cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only " aria-hidden="true" ><span class="vector-icon mw-ui-icon-listBullet mw-ui-icon-wikimedia-listBullet"></span> <span class="vector-dropdown-label-text">Toggle the table of contents</span> </label> <div class="vector-dropdown-content"> <div id="vector-sticky-header-toc-unpinned-container" class="vector-unpinned-container"> </div> </div> </div> </nav> <div class="vector-sticky-header-context-bar-primary" aria-hidden="true" ><span class="mw-page-title-main">CUDA</span></div> </div> </div> <div class="vector-sticky-header-end" aria-hidden="true"> <div class="vector-sticky-header-icons"> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only" id="ca-talk-sticky-header" tabindex="-1" data-event-name="talk-sticky-header"><span class="vector-icon mw-ui-icon-speechBubbles mw-ui-icon-wikimedia-speechBubbles"></span> <span></span> </a> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only" id="ca-subject-sticky-header" tabindex="-1" data-event-name="subject-sticky-header"><span class="vector-icon mw-ui-icon-article mw-ui-icon-wikimedia-article"></span> <span></span> </a> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only" id="ca-history-sticky-header" tabindex="-1" data-event-name="history-sticky-header"><span class="vector-icon mw-ui-icon-wikimedia-history mw-ui-icon-wikimedia-wikimedia-history"></span> <span></span> </a> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only mw-watchlink" id="ca-watchstar-sticky-header" tabindex="-1" data-event-name="watch-sticky-header"><span class="vector-icon mw-ui-icon-wikimedia-star mw-ui-icon-wikimedia-wikimedia-star"></span> <span></span> </a> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only" id="ca-edit-sticky-header" tabindex="-1" data-event-name="wikitext-edit-sticky-header"><span class="vector-icon mw-ui-icon-wikimedia-wikiText mw-ui-icon-wikimedia-wikimedia-wikiText"></span> <span></span> </a> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only" id="ca-ve-edit-sticky-header" tabindex="-1" data-event-name="ve-edit-sticky-header"><span class="vector-icon mw-ui-icon-wikimedia-edit mw-ui-icon-wikimedia-wikimedia-edit"></span> <span></span> </a> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only" id="ca-viewsource-sticky-header" tabindex="-1" data-event-name="ve-edit-protected-sticky-header"><span class="vector-icon mw-ui-icon-wikimedia-editLock mw-ui-icon-wikimedia-wikimedia-editLock"></span> <span></span> </a> </div> <div class="vector-sticky-header-buttons"> <button class="cdx-button cdx-button--weight-quiet mw-interlanguage-selector" id="p-lang-btn-sticky-header" tabindex="-1" data-event-name="ui.dropdown-p-lang-btn-sticky-header"><span class="vector-icon mw-ui-icon-wikimedia-language mw-ui-icon-wikimedia-wikimedia-language"></span> <span>34 languages</span> </button> <a href="#" class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--action-progressive" id="ca-addsection-sticky-header" tabindex="-1" data-event-name="addsection-sticky-header"><span class="vector-icon mw-ui-icon-speechBubbleAdd-progressive mw-ui-icon-wikimedia-speechBubbleAdd-progressive"></span> <span>Add topic</span> </a> </div> <div class="vector-sticky-header-icon-end"> <div class="vector-user-links"> </div> </div> </div> </div> </div> <div class="mw-portlet mw-portlet-dock-bottom emptyPortlet" id="p-dock-bottom"> <ul> </ul> </div> <script>(RLQ=window.RLQ||[]).push(function(){mw.config.set({"wgHostname":"mw-web.eqiad.main-9fdbc9b66-dzg6d","wgBackendResponseTime":408,"wgPageParseReport":{"limitreport":{"cputime":"1.410","walltime":"1.700","ppvisitednodes":{"value":12158,"limit":1000000},"postexpandincludesize":{"value":397876,"limit":2097152},"templateargumentsize":{"value":15508,"limit":2097152},"expansiondepth":{"value":26,"limit":100},"expensivefunctioncount":{"value":15,"limit":500},"unstrip-depth":{"value":1,"limit":20},"unstrip-size":{"value":387571,"limit":5000000},"entityaccesscount":{"value":1,"limit":400},"timingprofile":["100.00% 1319.394 1 -total"," 38.82% 512.186 1 Template:Reflist"," 24.70% 325.846 73 Template:Cite_web"," 12.32% 162.536 12 Template:Navbox"," 10.59% 139.680 2 Template:Infobox"," 8.65% 114.190 1 Template:Infobox_software"," 8.25% 108.820 1 Template:Nvidia"," 8.07% 106.463 1 Template:Navbox_with_collapsible_groups"," 7.63% 100.632 1 Template:Short_description"," 7.59% 100.153 1 Template:Multiple_issues"]},"scribunto":{"limitreport-timeusage":{"value":"0.643","limit":"10.000"},"limitreport-memusage":{"value":7446479,"limit":52428800}},"cachereport":{"origin":"mw-api-ext.eqiad.main-79c68c7457-wm95s","timestamp":"20250324195919","ttl":619246,"transientcontent":true}}});});</script> <script type="application/ld+json">{"@context":"https:\/\/schema.org","@type":"Article","name":"CUDA","url":"https:\/\/en.wikipedia.org\/wiki\/CUDA","sameAs":"http:\/\/www.wikidata.org\/entity\/Q477690","mainEntity":"http:\/\/www.wikidata.org\/entity\/Q477690","author":{"@type":"Organization","name":"Contributors to Wikimedia projects"},"publisher":{"@type":"Organization","name":"Wikimedia Foundation, Inc.","logo":{"@type":"ImageObject","url":"https:\/\/www.wikimedia.org\/static\/images\/wmf-hor-googpub.png"}},"datePublished":"2006-11-14T13:12:46Z","dateModified":"2025-03-24T19:59:08Z","image":"https:\/\/upload.wikimedia.org\/wikipedia\/commons\/b\/b9\/Nvidia_CUDA_Logo.jpg","headline":"parallel computing platform and programming model"}</script> </body> </html>

Pages: 1 2 3 4 5 6 7 8 9 10